/***************************************************************************
    Copyright          : (C) 2002 by Neoworks Limited. All rights reserved
    URL                : http://www.neoworks.com
 ***************************************************************************/
/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

package com.neoworks.rdc;

import java.util.*;

/**
 * A lexer class which will turn any input string into a list of tokens given a symbol table.
 * The lexer is constructed by giving it the input string and the tokens to put into the
 * symbol table. Then the list of lexed tokens can be retrived. All the lexing is done at one
 * time not on demand. The list of tokens can then be passed. Using a parser.
 *
 * @see Token
 * @see Parser
 * 
 * Current properties
 * ------------------
 * 
 * lineCommentDelim: Character for delimiting a line comment. (String)
 * handleLineComments: Boolean.TRUE To handle comments, Boolean.FALSE otherwise. (Boolean)
 */
public class Lexer
{
	private SymbolTable symTable;         // The symbol table to use while lexing
	private TokenList outputString;       // The output as a list of tokens
	private String inputString;           // The input string to lex
	private int stringLength;             // The input string length
	private int currentChar;              // The current position in the input
	private Hashtable m_properties;
	
	/** The dummy token type (internal only??) */
	public static final int DUMMY  = 0;
	/** A number token */
	public static final int NUM    = 1;
	/** An identifier token */
	public static final int ID     = 2;
	/** A string token */
	public static final int STRING = 3;
	/** A not real token (end of input) */
	public static final int NONE   = 4;
	
	/**  The END of the input */
	public static final int END = NONE;

	//private boolean treatSingleQuoteAsString = false;

	/**
	 * Constructor to make a new lexer with the given input string to parse.
	 * Also a vector of tokens can be passed to put into the symbol table
	 * when the lexer is initialized.
	 *
	 * @param inputString  The input string to lex
	 * @param newTokens    The tokens to put into the symbol table
	 */
	public Lexer(String inputString, Vector newTokens)
	{
		this.inputString = new String(inputString);
		outputString = new TokenList();		
		symTable = new SymbolTable(newTokens);		
		currentChar = 0;		
		stringLength = inputString.length();
		
		this.m_properties = new Hashtable();
		
		m_properties.put("lineCommentDelim", "#");
		m_properties.put("handleLineComments", Boolean.TRUE);
		m_properties.put("treatSingleQuoteAsString", Boolean.FALSE);
	}
	
	/**
	 * @deprecated
	 */
	public void setTreatSingleQuoteAsString(boolean b)
	{
		m_properties.put("treatSingleQuoteAsString", new Boolean(b));
	}
	
	/**
	 * @deprecated
	 */
	public boolean getTreatSingleQuoteAsString()
	{
		return ((Boolean)m_properties.get("treatSingleQuoteAsString")).booleanValue();
	}

	/**
	 * Function to lext the input. Scans the input and turns it into tokens.
	 * Returns the lexed output as a list of tokens.
	 *
	 * @return The list of tokens which have been lexed
	 */
	public TokenList lexIt()
	{
		// Lex the input string into tokens
		while (currentChar < stringLength)
		{
			Token cur_tok = getToken();
			
			if(cur_tok != null)
			{
				outputString.addToken(cur_tok);
			}
		}

		int stepThrough = 0;
		int maxStep = outputString.getSize();

		while (stepThrough < maxStep)
		{
			if ((outputString.getTokenAt(stepThrough)).getTokenType() == DUMMY)
			{
				outputString.removeTokenAt(stepThrough);
				maxStep--;
			}
			else
			{
				stepThrough++;
			}
		}

		outputString.addToken (new Token(NONE,NONE,"END",stringLength+1));
		return outputString;
	}

	/**
	 * Function to get the next token from the input stream.
	 * <p>Heres how we do things:
	 * <ul>
	 * <li>Strip the whitespace from the current position in the
	 *     input stream.
	 * <li>Now look for numbers. NOTE that identifiers cannot start
	 *     with a number as the number is matched first.
	 * <li>Lastly look for identifiers and check if they are in the
	 *     symbol tabel. If not add them to the table.
	 * <li>Match non alphanumeric characters that are present in the symbol table
	 *     The effect is that we can use non-special characters as identifiers!
	 *     By this time, there is no white space, no possibility of any other type
	 *     of character <I>except</I> non alphanumeric.
	 *     Therefore, step through until we find whitespace or alphanumeric, and then
	 *     match to symbol table
	 * <li>If no token is found then a DUMMY token is returned.
	 * </ul>
	 *
	 * @return The token found
	 */
	private Token getToken()
	{
		boolean seenComment = true;
	
		// Strip white space
		while (currentChar < stringLength && Character.isWhitespace(inputString.charAt(currentChar)))
		{
			currentChar++;
		}
		
		int tokenStart = currentChar;
		
		// Check for end of input
		if (currentChar >= stringLength)
		{
			// No token found (end of input??) return dummy token
			return (new Token(DUMMY,DUMMY,"",tokenStart));
		}
		
		// Match comments (if we are ment to).
		if(((Boolean)(m_properties.get("handleLineComments"))).equals(Boolean.TRUE))
		{
			String lineCommentDelim = (String)m_properties.get("lineCommentDelim");
			if(inputString.substring(currentChar, currentChar + lineCommentDelim.length()).equals(lineCommentDelim))
			{
				// skip to the end of the line.
				while(inputString.charAt(currentChar) != '\n')
				{
					currentChar++;
				}
				
				return null;
			}
		}

		// Check for end of input
		if (currentChar >= stringLength)
		{
			// No token found (end of input??) return dummy token
			return (new Token(DUMMY,DUMMY,"",tokenStart));
		}

		// Match Strings (Double Quote)
		if (inputString.charAt(currentChar) == '\"')
		{
			int stepChar = currentChar + 1;

			int strEnd = findEndOfQuotedString(inputString, stepChar, '\"');
			if ( strEnd == -1 )												// This means the String did not terminate
			{
				currentChar = stringLength + 1;								// Throw the pointer off the end of the expression
				return new Token( DUMMY , DUMMY , "" , tokenStart );		// Return a dummy token
			}
			else
			{
				stepChar = strEnd;
			}

			if (stepChar > currentChar)
			{
				String returnValue = unescapeString(inputString.substring(currentChar + 1,stepChar));
				currentChar = stepChar + 1;
				return new Token(STRING,NONE,returnValue,tokenStart);
			}
		}
		
		if(((Boolean)(m_properties.get("treatSingleQuoteAsString"))).equals(Boolean.TRUE))
		{
			// Match Strings (Single Quote)
			if (inputString.charAt(currentChar) == '\'')
			{
				int stepChar = currentChar + 1;

				int strEnd = findEndOfQuotedString(inputString, stepChar, '\'');
				if ( strEnd == -1 )												// This means the String did not terminate
				{
					currentChar = stringLength + 1;								// Throw the pointer off the end of the expression
					return new Token( DUMMY , DUMMY , "" , tokenStart );		// Return a dummy token
				}
				else
				{
					stepChar = strEnd;
				}

				if (stepChar > currentChar)
				{
					String returnValue = unescapeString(inputString.substring(currentChar + 1,stepChar));
					currentChar = stepChar + 1;
					return new Token(STRING,NONE,returnValue,tokenStart);
				}
			}
		}

		// Match integers
		int stepChar = currentChar;

		while (stepChar < stringLength && Character.isDigit(inputString.charAt(stepChar)))
		{
			++stepChar;
		}

		if (stepChar > currentChar)
		{
			String strData = inputString.substring(currentChar,stepChar);
			int returnValue = Integer.parseInt(strData);
			currentChar = stepChar;
			return new Token(NUM,returnValue,strData,tokenStart);
		}


		// Match Keywords and identifiers.

		// Start by id'ing  alphanumerics.

		stepChar = currentChar;

		while (stepChar < stringLength && (Character.isLetterOrDigit(inputString.charAt(stepChar)) || inputString.charAt(stepChar)=='_'))
		{
			++stepChar;
		}

		// locate substring - pull it out
		String returnString = "DUMMY";

		if (stepChar > currentChar)
		{
			returnString = inputString.substring(currentChar,stepChar);
			currentChar = stepChar;

			// compare to symbol table
			int tokenLocation = (symTable.isInTable(new Token(DUMMY,DUMMY,returnString)));

			if (tokenLocation < 0)
			{
				symTable.addToken(new Token(ID,NONE,returnString));
				return new Token(ID,NONE,returnString,tokenStart);
			}
			else
			{
				return new Token(symTable.getTokenAt(tokenLocation), tokenStart);
			}
		}

	 /*
		* Match non alphanumeric characters that are present in the symbol table
		* The effect is that we can use non-special characters as identifiers!
		* By this time, there is no white space, no possibility of any other type
		* of character <I>except</I> non alphanumeric.
		* Therefore, step through until we find whitespace or alphanumeric, and then
		* match to symbol table
		*/
		stepChar = currentChar;
		while (stepChar < stringLength && !(Character.isLetterOrDigit((inputString.charAt(stepChar)))||(inputString.charAt(stepChar))==' '))
		{
			++stepChar;
		}

		// Break out the string and compare to symbol-table, if necessary
		if (stepChar > currentChar)
		{
			//System.out.println("Looking for symbol between "+ currentChar + " and " + stepChar);
			int end = stepChar;
			
			while (end > currentChar)
			{
				returnString = inputString.substring(currentChar,end);
				//System.out.println("Checking string: " + returnString);
				
				//  compare to symbol table
				int tokenLocation = (symTable.isInTable(new Token(DUMMY,DUMMY,returnString)));
				
				if (tokenLocation >= 0)
				{
					//System.out.println("found: " + returnString);
					currentChar = end;
					return new Token(symTable.getTokenAt(tokenLocation), tokenStart);
				}
				
				end--;
			}
			
			returnString = inputString.substring(currentChar,end);
			//System.out.println("Not found in symbol table: " + returnString);
			
			currentChar = stepChar;
			return (new Token(DUMMY,DUMMY,returnString,tokenStart));
		}

		// No token found (end of input??) return dummy token
		return (new Token(DUMMY,DUMMY,"",tokenStart));
	}

	
	private int findEndOfQuotedString(String data, int start, char quoteChar)
	{
		int stepChar = start;
		int stringLength = data.length();
		
		
		while (stepChar < stringLength)
		{
			if (data.charAt(stepChar) == '\\')
			{
				// Seen an escape char
				++stepChar;
			}
			else if (data.charAt(stepChar) == quoteChar)
			{
				// Seen an end quote char
				return stepChar;
			}
			
			++stepChar;
		}
		
		return -1;	// We ran out of data without an end char.
	}

	
	private String unescapeString(String data)
	{
		int stepChar = 0;
		int stringLength = data.length();
		
		StringBuffer buf = new StringBuffer();
		
		while (stepChar < stringLength)
		{
			if (data.charAt(stepChar) == '\\')
			{
				// Seen an escape char
				++stepChar;
				
				if (data.charAt(stepChar) == 'n')
				{
					buf.append('\n');
				}
				else if (data.charAt(stepChar) == 't')
				{
					buf.append('\t');
				}
				else if(data.charAt(stepChar) == '\"')
				{
					buf.append('\"');
				}
				else if(data.charAt(stepChar) == '\\')
				{
					buf.append('\\');
				}
				else
				{
					// Not nessesary (Nicko)
					//stepChar--;
					buf.append(data.charAt(stepChar));
				}
				
			}
			else
			{
				buf.append(data.charAt(stepChar));
			}
			++stepChar;
		}
		
		return buf.toString();
	}

	public void setProperty(String propertyName, Object property)
	{
		m_properties.put(propertyName, property);
	}
	
	public Object getProperty(String propertyName)
	{
		return m_properties.get(propertyName);
	}
}
