X-Git-Url: http://git.phpeclipse.com

diff --git a/archive/net.sourceforge.phpeclipse.quantum.sql/src/com/quantum/sql/parser/SQLLexx.java b/archive/net.sourceforge.phpeclipse.quantum.sql/src/com/quantum/sql/parser/SQLLexx.java
index e901266..8c83886 100644
--- a/archive/net.sourceforge.phpeclipse.quantum.sql/src/com/quantum/sql/parser/SQLLexx.java
+++ b/archive/net.sourceforge.phpeclipse.quantum.sql/src/com/quantum/sql/parser/SQLLexx.java
@@ -2,10 +2,36 @@ package com.quantum.sql.parser;
 
 import java.util.Vector;
 
+/**
+ * <p>An SQL Lexer.  From 
+ * <a href="http://www.dictionary.com/">dictionary.com</a>:
+ * 
+ * <blockquote>
+ * <p><b>lexer</b>
+ *
+ * <p>/lek'sr/ n. Common hacker shorthand for 'lexical
+ * analyzer', the input-tokenizing stage in the parser for a language
+ * (the part that breaks it into word-like pieces).
+ * </blockquote>
+ * 
+ * <p>Note that this class has nothing to do with the Sci-fi channel's
+ * <a href="http://www.scifi.com/lexx/">Lexx</a> TV series.
+ */
 public class SQLLexx {
-	private static String endline = ";"; //$NON-NLS-1$
-	private static String dash = "-"; //$NON-NLS-1$
-	private static String group = "/"; //$NON-NLS-1$
+	private final static char CHAR_EOL = '\n';
+	private final static char CHAR_DASH = '-';
+	private final static char CHAR_ESCAPE = '\\';
+	private final static char CHAR_SEPARATOR = ';';
+	
+	private final static int CONDITION_WHITESPACE = 1;
+	private final static int CONDITION_IDENTIFIER = 2;
+	private final static int CONDITION_IDENTIFIER_INITIAL = 3;
+	private final static int CONDITION_LITERAL_SIMPLE_QUOTE = 4;
+	private final static int CONDITION_LITERAL_DOUBLE_QUOTE = 5;
+	private final static int CONDITION_NUMERIC = 6;
+	private final static int CONDITION_EOL = 7;
+	
+	
 	/**
 	 * Parses a SQL text into tokens. 
 	 * @param text
@@ -19,134 +45,75 @@ public class SQLLexx {
 				int offset = p.getOffset();
 				char c = p.getNext();
 				// Adds END_OF_LINE token
-				if (c == '\n') {
-					tokens.addElement(new Token(Token.END_OF_LINE, "\n", offset, offset + 1));	
+				if (c == CHAR_EOL) {
+					tokens.addElement(new Token(Token.END_OF_LINE, CHAR_EOL, offset));	
 				}
 				// Adds WHITESPACE token;
-				else if (Character.isWhitespace(c)) {
-					StringBuffer value = new StringBuffer();
-					while (Character.isWhitespace(c) && !p.isDone()) {
-						value.append(c);
-						c = p.getNext();
-					}
-					// done because of is done
-					if (Character.isWhitespace(c)) {
-						value.append(c);
-					} else if (!p.isDone()){
-						p.back();
-					}
+				else if (CheckCondition( c, CONDITION_WHITESPACE)) 
+				{
+					StringBuffer value = AddTokenWhile(p, c, CONDITION_WHITESPACE);
 					tokens.addElement(new Token(Token.WHITESPACE, value.toString(), offset, offset + value.length()));
 				// Adds IDENTIFIER token (can be reserved SQL word or not);
-				} else if (Character.isLetter(c) || c == '_' || c == '$') {
-					StringBuffer value = new StringBuffer();
-					while ((Character.isLetterOrDigit(c) || c == '_'  || c == '$') && !p.isDone()) {
-						value.append(c);
-						c = p.getNext();
-					}
-					if ((Character.isLetterOrDigit(c) || c == '_')) {
-						value.append(c);
-					} else if (!p.isDone()){
-						p.back();
-					}
+				} else if (CheckCondition( c , CONDITION_IDENTIFIER_INITIAL)) 
+				{
+					StringBuffer value = AddTokenWhile(p, c, CONDITION_IDENTIFIER);
 					tokens.addElement(new Token(Token.IDENTIFIER, value.toString(), offset, offset + value.length()));
 				// Adds LITERAL token;
-				} else if (c == '\'') {
-					StringBuffer value = new StringBuffer();
-					value.append(c);
-					if (!p.isDone()) {
-						c = p.getNext();
-						while (c != '\'' && c != '\n' && !p.isDone()) {
-							value.append(c);
-							c = p.getNext();
-						}
-						if (c == '\'' || p.isDone()) {
-							value.append(c);
-						} else if (!p.isDone()){
-							p.back();
-						}
-					}
+				} else if (CheckCondition(c, CONDITION_LITERAL_SIMPLE_QUOTE)) {
+					StringBuffer value = AddTokenUntil(p, c, CONDITION_LITERAL_SIMPLE_QUOTE);
 					tokens.addElement(new Token(Token.LITERAL, value.toString(), offset, offset + value.length()));
-				// Adds COMMENT token (or SYMBOL (dash) if only one dash);
-				} else if (c == '-') {
-					p.mark();
+				// Adds LITERAL token;
+				} else if (CheckCondition(c, CONDITION_LITERAL_DOUBLE_QUOTE)) {
+					StringBuffer value = AddTokenUntil(p, c, CONDITION_LITERAL_SIMPLE_QUOTE);
+					tokens.addElement(new Token(Token.LITERAL, value.toString(), offset, offset + value.length()));
+				// Adds NUMERIC token;
+				} else if (Character.isDigit(c)) {
+					StringBuffer value = AddTokenWhile(p, c, CONDITION_NUMERIC);
+					tokens.addElement(new Token(Token.NUMERIC, value.toString(), offset, offset + value.length()));
+				// Adds COMMENT token if two dashes (or SYMBOL (dash) if only one dash);
+				} else if (c == CHAR_DASH) {
 					if (p.isDone()) {
-						tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
+						tokens.addElement(new Token(Token.SYMBOL, new Character(CHAR_DASH).toString(), offset, offset + 1));
 					} else {
-						char next = p.getNext();
-						if (next == '-') {
-							StringBuffer value = new StringBuffer("--"); //$NON-NLS-1$
-							if (!p.isDone()) {
-								c = p.getNext();
-								while (c != '\n' && !p.isDone()) {
-									value.append(c);
-									c = p.getNext();
-								}
-								if (p.isDone()) {
-									value.append(c);
-								} else {
-									p.back();
-								}
-							}
+						char next = p.peek();
+						if (next == CHAR_DASH) {
+							StringBuffer value = AddTokenUntil(p, CHAR_DASH, CONDITION_EOL);
 							tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
 						} else {
-							tokens.addElement(new Token(Token.SYMBOL, dash, offset, offset + 1));
-							p.reset();
+							tokens.addElement(new Token(Token.SYMBOL, new Character(CHAR_DASH).toString(), offset, offset + 1));
 						}
+					}	
+                 //	Determine if the ';' is escaped or not
+				} else if (c == CHAR_ESCAPE) {
+					if (p.peek() == CHAR_SEPARATOR) {
+						p.getNext();	// We advance the pointer so the separator is not marked again
+						// We DON´T SAVE the scape character in the tokens. 
+						// For correct sintax highlighting we set the offset to +2
+						// This is so far the only case when a character is eliminated and not saved to the tokens.
+						// That means it won´t be sent to the database when executed.
+						// This is to allow definitions of procedures with ';' as an end-of-sentence, 
+						//  not as an execution symbol for SQL.
+						tokens.addElement(new Token(Token.SYMBOL, new Character(CHAR_SEPARATOR).toString() , offset, offset + 2));
+					}	else {
+						tokens.addElement(new Token(Token.SYMBOL, new Character(CHAR_ESCAPE).toString() , offset, offset + 1));
 					}
 				// Adds SEPARATOR token (;),  considers the rest of the line as COMMENT token;
-				} else if (c == ';') {
-					tokens.addElement(new Token(Token.SEPARATOR, endline, offset, offset + 1));
-					StringBuffer value = new StringBuffer();
+				} else if (c == CHAR_SEPARATOR) {
+					tokens.addElement(new Token(Token.SEPARATOR, new Character(CHAR_SEPARATOR).toString(), offset, offset + 1));
+					// The rest of the line will be a comment
 					if (!p.isDone()) {
-						c = p.getNext();
-						while (c != '\n' && !p.isDone()) {
-							value.append(c);
-							c = p.getNext();
-						}
-						if (p.isDone()) {
-							value.append(c);
-						} else {
-							p.back();
-						}
-						// We add to the offset so as to skip the initial ';'
+						StringBuffer value = AddTokenUntil(p, "", CONDITION_EOL);
+						// 	We add to the offset so as to skip the initial ';'
 						offset++;
 						tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
 					}
-				// Adds NUMERIC token;
-				} else if (Character.isDigit(c)) {
-					StringBuffer value = new StringBuffer();
-					while ((Character.isDigit(c) || c == '.') && !p.isDone()) {
-						value.append(c);
-						c = p.getNext();
-					}
-					if ((Character.isDigit(c) || c == '.')) {
-						value.append(c);
-					} else {
-						p.back();
-					}
-					tokens.addElement(new Token(Token.NUMERIC, value.toString(), offset, offset + value.length()));
-				// Adds COMMENT token (or GROUP (slash) if only one slash);
+				// Adds COMMENT token, for several lines;
 				} else if (c == '/') {
-					p.mark();
 					// If we have '/*', it's a comment till '*/' found or eof
 					if (p.peek() == '*') {
-						StringBuffer value = new StringBuffer();
-						c = p.getNext();
-						value.append('/');
-						while (!( c == '*' && p.peek() == '/' ) && !p.isDone()) {
-							value.append(c);
-							c = p.getNext();
-						}
-						if (!p.isDone()){
-							value.append(c);
-							c = p.getNext();
-							value.append(c);	
-						}
-						tokens.addElement(new Token(Token.COMMENT, value.toString(), offset, offset + value.length()));
+						tokens.addElement(tokenizeComment(p, offset));
 					} else {
-						// It's not '/*' , so it's a group token
-						tokens.addElement(new Token(Token.GROUP, group, offset, offset + 1));
-						p.reset();
+						tokens.addElement(new Token(Token.SYMBOL, new String(new char[] {c}) , offset, offset + 1));
 					}
 				// Adds SYMBOL token;
 				} else {
@@ -163,4 +130,103 @@ public class SQLLexx {
 //		}
 		return tokens;
 	}
+	/**
+	 * Searchs for a token end, UNTIL the condition is true, or a newline, or the end of the StringPointer
+	 * The end character is also addedd to the StringBuffer
+	 * @param p
+	 * @param s A string with the first character from the token, already extracted from the StringPointer
+	 * @param condition
+	 * @return a StringBuffer with the complete token
+	 */
+	private static StringBuffer AddTokenUntil(StringPointer p, String s, int condition) {
+		StringBuffer value = new StringBuffer(s);
+		if (p.isDone()) return value;
+		for(;;) {
+			char c = p.getNext();
+			if (c != CHAR_EOL) value.append(c);
+			if (CheckCondition (c, condition) || c == CHAR_EOL || p.isDone()) {
+				break; 
+			} 
+		}	
+		return value;
+	}
+	private static StringBuffer AddTokenUntil(StringPointer p, char c, int condition) {
+		return AddTokenUntil(p, new Character(c).toString(), condition);
+	}
+	/**
+	 * Searchs for a token end, WHILE the condition is true, or the end or the StringPointer.
+	 * @param p		The StringPointer where the original stream is
+	 * @param s		A string with the first character from the token, already extracted from the StringPointer
+	 * @param condition	The condition to end the token
+	 * @return a StringBuffer with the complete token
+	 */
+	private static StringBuffer AddTokenWhile(StringPointer p, String s, int condition) {
+		StringBuffer value = new StringBuffer(s);
+		if (p.isDone()) return value;
+		for(;;) {
+			char c = p.getNext();
+			if (CheckCondition (c, condition)) {
+				value.append(c);
+				if (p.isDone()) break; 
+			} 
+			else
+			{
+				p.back();
+				break;
+			}
+		}	
+		return value;
+	}
+	private static StringBuffer AddTokenWhile(StringPointer p, char c, int condition) {
+		return AddTokenWhile(p, new Character(c).toString(), condition);
+	}
+	/**
+	 * Returns true if the character meets the condition, and false if not. 
+	 * New conditions should be defined in this function
+	 * @param c	The character to check the condition
+	 * @param condition The condition to check
+	 * @return
+	 */
+	private static boolean CheckCondition(char c, int condition) {
+		switch (condition) {
+		case CONDITION_WHITESPACE:
+			return Character.isWhitespace(c);
+		case CONDITION_IDENTIFIER_INITIAL:
+			return (Character.isLetter(c) || c == '$' || c == '#'); 
+		case CONDITION_IDENTIFIER:
+			return (Character.isLetter(c) || Character.isDigit(c) || c == '_' || c == '$' || c == '#'); 
+		case CONDITION_LITERAL_SIMPLE_QUOTE:
+			return (c == '\''); 
+		case CONDITION_LITERAL_DOUBLE_QUOTE:
+			return (c == '\"');
+		case CONDITION_NUMERIC:
+			return (Character.isDigit(c) || c == '.'); 
+		case CONDITION_EOL:
+			return (c == CHAR_EOL); 
+		default:
+			break;
+		}
+		return false;
+	}
+	/**
+	 * @param tokens
+	 * @param p
+	 * @param offset
+	 */
+	private static Token tokenizeComment(StringPointer p, int offset) {
+		char c;
+		StringBuffer value = new StringBuffer();
+		c = p.getNext();
+		value.append('/');
+		while (!( c == '*' && p.peek() == '/' ) && !p.isDone()) {
+			value.append(c);
+			c = p.getNext();
+		}
+		if (!p.isDone()){
+			value.append(c);
+			c = p.getNext();
+			value.append(c);	
+		}
+		return new Token(Token.COMMENT, value.toString(), offset, offset + value.length());
+	}
 }