X-Git-Url: http://git.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java index 0826535..f870539 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java @@ -14,24 +14,24 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import net.sourceforge.phpdt.core.compiler.*; +import net.sourceforge.phpdt.core.compiler.CharOperation; +import net.sourceforge.phpdt.core.compiler.IScanner; +import net.sourceforge.phpdt.core.compiler.ITerminalSymbols; +import net.sourceforge.phpdt.core.compiler.InvalidInputException; import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral; public class Scanner implements IScanner, ITerminalSymbols { - /* APIs ares - - getNextToken() which return the current type of the token - (this value is not memorized by the scanner) - - getCurrentTokenSource() which provides with the token "REAL" source - (aka all unicode have been transformed into a correct char) - - sourceStart gives the position into the stream - - currentPosition-1 gives the sourceEnd position into the stream - */ + /* + * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) - + * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct + * char) - sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream + */ - // 1.4 feature + // 1.4 feature private boolean assertMode; public boolean useAssertAsAnIndentifier = false; - //flag indicating if processed source contains occurrences of keyword assert + //flag indicating if processed source contains occurrences of keyword assert public boolean containsAssertKeyword = false; public boolean recordLineSeparator; @@ -67,7 +67,7 @@ public class Scanner implements IScanner, ITerminalSymbols { //diet parsing support - jump over some method body when requested public boolean diet = false; - //support for the poor-line-debuggers .... + //support for the poor-line-debuggers .... //remember the position of the cr/lf public int[] lineEnds = new int[250]; public int linePtr = -1; @@ -116,14 +116,12 @@ public class Scanner implements IScanner, ITerminalSymbols { charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' }; - static final char[] initCharArray = - new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' }; + static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' }; static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries public static final int OptimizedLength = 6; - public /*static*/ - final char[][][][] charArray_length = - new char[OptimizedLength][TableSize][InternalTableSize][]; + public /* static */ + final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][]; // support for detecting non-externalized string literals int currentLineNr = -1; int previousLineNr = -1; @@ -137,7 +135,7 @@ public class Scanner implements IScanner, ITerminalSymbols { public boolean checkNonExternalizedStringLiterals = true; public boolean wasNonExternalizedStringLiteral = false; - /*static*/ { + /* static */ { for (int i = 0; i < 6; i++) { for (int j = 0; j < TableSize; j++) { for (int k = 0; k < InternalTableSize; k++) { @@ -146,18 +144,24 @@ public class Scanner implements IScanner, ITerminalSymbols { } } } - static int newEntry2 = 0, - newEntry3 = 0, - newEntry4 = 0, - newEntry5 = 0, - newEntry6 = 0; + static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0; public static final int RoundBracket = 0; public static final int SquareBracket = 1; public static final int CurlyBracket = 2; public static final int BracketKinds = 3; - public static final boolean DEBUG = false; + // task tag support + public char[][] foundTaskTags = null; + public char[][] foundTaskMessages; + public char[][] foundTaskPriorities = null; + public int[][] foundTaskPositions; + public int foundTaskCount = 0; + public char[][] taskTags = null; + public char[][] taskPriorities = null; + + public static final boolean DEBUG = true; + public Scanner() { this(false, false); } @@ -166,23 +170,21 @@ public class Scanner implements IScanner, ITerminalSymbols { } /** - * Determines if the specified character is - * permissible as the first character in a PHP identifier + * Determines if the specified character is permissible as the first character in a PHP identifier */ public static boolean isPHPIdentifierStart(char ch) { - return Character.isLetter(ch) || (ch == '_'); + return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF); } /** - * Determines if the specified character may be part of a PHP identifier as - * other than the first character + * Determines if the specified character may be part of a PHP identifier as other than the first character */ public static boolean isPHPIdentifierPart(char ch) { - return Character.isLetterOrDigit(ch) || (ch == '_'); + return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF); } public final boolean atEnd() { - // This code is not relevant if source is + // This code is not relevant if source is // Only a part of the real stream input return source.length == currentPosition; @@ -191,64 +193,55 @@ public class Scanner implements IScanner, ITerminalSymbols { //return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - //0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy( - withoutUnicodeBuffer, - 1, - result = new char[withoutUnicodePtr], - 0, - withoutUnicodePtr); - else { - int length = currentPosition - startPosition; - switch (length) { // see OptimizedLength - case 1 : - return optimizedCurrentTokenSource1(); - case 2 : - return optimizedCurrentTokenSource2(); - case 3 : - return optimizedCurrentTokenSource3(); - case 4 : - return optimizedCurrentTokenSource4(); - case 5 : - return optimizedCurrentTokenSource5(); - case 6 : - return optimizedCurrentTokenSource6(); - } - //no optimization - System.arraycopy( - source, - startPosition, - result = new char[length], - 0, - length); + // if (withoutUnicodePtr != 0) + // //0 is used as a fast test flag so the real first char is in position 1 + // System.arraycopy( + // withoutUnicodeBuffer, + // 1, + // result = new char[withoutUnicodePtr], + // 0, + // withoutUnicodePtr); + // else { + int length = currentPosition - startPosition; + switch (length) { // see OptimizedLength + case 1 : + return optimizedCurrentTokenSource1(); + case 2 : + return optimizedCurrentTokenSource2(); + case 3 : + return optimizedCurrentTokenSource3(); + case 4 : + return optimizedCurrentTokenSource4(); + case 5 : + return optimizedCurrentTokenSource5(); + case 6 : + return optimizedCurrentTokenSource6(); } + //no optimization + System.arraycopy(source, startPosition, result = new char[length], 0, length); + // } return result; } public int getCurrentTokenEndPosition() { return this.currentPosition - 1; } + public final char[] getCurrentTokenSource() { // Return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - // 0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy( - withoutUnicodeBuffer, - 1, - result = new char[withoutUnicodePtr], - 0, - withoutUnicodePtr); - else { - int length; - System.arraycopy( - source, - startPosition, - result = new char[length = currentPosition - startPosition], - 0, - length); - } + // if (withoutUnicodePtr != 0) + // // 0 is used as a fast test flag so the real first char is in position 1 + // System.arraycopy( + // withoutUnicodeBuffer, + // 1, + // result = new char[withoutUnicodePtr], + // 0, + // withoutUnicodePtr); + // else { + int length; + System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length); + // } return result; } @@ -256,23 +249,18 @@ public class Scanner implements IScanner, ITerminalSymbols { // Return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - // 0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy( - withoutUnicodeBuffer, - 1, - result = new char[withoutUnicodePtr], - 0, - withoutUnicodePtr); - else { - int length; - System.arraycopy( - source, - startPos, - result = new char[length = currentPosition - startPos], - 0, - length); - } + // if (withoutUnicodePtr != 0) + // // 0 is used as a fast test flag so the real first char is in position 1 + // System.arraycopy( + // withoutUnicodeBuffer, + // 1, + // result = new char[withoutUnicodePtr], + // 0, + // withoutUnicodePtr); + // else { + int length; + System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length); + // } return result; } @@ -288,24 +276,30 @@ public class Scanner implements IScanner, ITerminalSymbols { result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2); else { int length; - System.arraycopy( - source, - startPosition + 1, - result = new char[length = currentPosition - startPosition - 2], - 0, - length); + System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length); } return result; } public int getCurrentTokenStartPosition() { return this.startPosition; } + + public final char[] getCurrentStringLiteralSource() { + // Return the token REAL source (aka unicodes are precomputed) + + char[] result; + + int length; + System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length); + // } + return result; + } + /* * Search the source position corresponding to the end of a given line number - * - * Line numbers are 1-based, and relative to the scanner initialPosition. - * Character positions are 0-based. - * + * + * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based. + * * In case the given line number is inconsistent, answers -1. */ public final int getLineEnd(int lineNumber) { @@ -324,12 +318,11 @@ public class Scanner implements IScanner, ITerminalSymbols { } /** * Search the source position corresponding to the beginning of a given line number - * - * Line numbers are 1-based, and relative to the scanner initialPosition. - * Character positions are 0-based. - * - * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0. - * + * + * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based. + * + * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0. + * * In case the given line number is inconsistent, answers -1. */ public final int getLineStart(int lineNumber) { @@ -355,66 +348,67 @@ public class Scanner implements IScanner, ITerminalSymbols { //Both previous lines are true if the currentCharacter is == to the testedChar //On false, no side effect has occured. - //ALL getNextChar.... ARE OPTIMIZED COPIES + //ALL getNextChar.... ARE OPTIMIZED COPIES int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (currentCharacter != testedChar) { - currentPosition = temp; - return false; - } - unicodeAsBackSlash = currentCharacter == '\\'; - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - - } //-------------end unicode traitement-------------- - else { - if (currentCharacter != testedChar) { - currentPosition = temp; - return false; - } - unicodeAsBackSlash = false; - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1, c2, c3, c4; + // int unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0) + // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0) + // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0) + // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0)) { + // currentPosition = temp; + // return false; + // } + // + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // if (currentCharacter != testedChar) { + // currentPosition = temp; + // return false; + // } + // unicodeAsBackSlash = currentCharacter == '\\'; + // + // //need the unicode buffer + // if (withoutUnicodePtr == 0) { + // //buffer all the entries that have been left aside.... + // withoutUnicodePtr = currentPosition - unicodeSize - startPosition; + // System.arraycopy( + // source, + // startPosition, + // withoutUnicodeBuffer, + // 1, + // withoutUnicodePtr); + // } + // //fill the buffer with the char + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // return true; + // + // } //-------------end unicode traitement-------------- + // else { + if (currentCharacter != testedChar) { + currentPosition = temp; + return false; } + unicodeAsBackSlash = false; + // if (withoutUnicodePtr != 0) + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + return true; + // } } catch (IndexOutOfBoundsException e) { unicodeAsBackSlash = false; currentPosition = temp; @@ -431,73 +425,74 @@ public class Scanner implements IScanner, ITerminalSymbols { //Both previous lines are true if the currentCharacter is == to the testedChar1/2 //On false, no side effect has occured. - //ALL getNextChar.... ARE OPTIMIZED COPIES + //ALL getNextChar.... ARE OPTIMIZED COPIES int temp = currentPosition; try { int result; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return 2; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (currentCharacter == testedChar1) - result = 0; - else if (currentCharacter == testedChar2) - result = 1; - else { - currentPosition = temp; - return -1; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return result; - } //-------------end unicode traitement-------------- + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1, c2, c3, c4; + // int unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0) + // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0) + // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0) + // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0)) { + // currentPosition = temp; + // return 2; + // } + // + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // if (currentCharacter == testedChar1) + // result = 0; + // else if (currentCharacter == testedChar2) + // result = 1; + // else { + // currentPosition = temp; + // return -1; + // } + // + // //need the unicode buffer + // if (withoutUnicodePtr == 0) { + // //buffer all the entries that have been left aside.... + // withoutUnicodePtr = currentPosition - unicodeSize - startPosition; + // System.arraycopy( + // source, + // startPosition, + // withoutUnicodeBuffer, + // 1, + // withoutUnicodePtr); + // } + // //fill the buffer with the char + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // return result; + // } //-------------end unicode traitement-------------- + // else { + if (currentCharacter == testedChar1) + result = 0; + else if (currentCharacter == testedChar2) + result = 1; else { - if (currentCharacter == testedChar1) - result = 0; - else if (currentCharacter == testedChar2) - result = 1; - else { - currentPosition = temp; - return -1; - } - - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return result; + currentPosition = temp; + return -1; } + + // if (withoutUnicodePtr != 0) + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + return result; + // } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return -1; @@ -512,63 +507,64 @@ public class Scanner implements IScanner, ITerminalSymbols { //Both previous lines are true if the currentCharacter is a digit //On false, no side effect has occured. - //ALL getNextChar.... ARE OPTIMIZED COPIES + //ALL getNextChar.... ARE OPTIMIZED COPIES int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!Character.isDigit(currentCharacter)) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { - if (!Character.isDigit(currentCharacter)) { - currentPosition = temp; - return false; - } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1, c2, c3, c4; + // int unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0) + // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0) + // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0) + // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0)) { + // currentPosition = temp; + // return false; + // } + // + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // if (!Character.isDigit(currentCharacter)) { + // currentPosition = temp; + // return false; + // } + // + // //need the unicode buffer + // if (withoutUnicodePtr == 0) { + // //buffer all the entries that have been left aside.... + // withoutUnicodePtr = currentPosition - unicodeSize - startPosition; + // System.arraycopy( + // source, + // startPosition, + // withoutUnicodeBuffer, + // 1, + // withoutUnicodePtr); + // } + // //fill the buffer with the char + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // return true; + // } //-------------end unicode traitement-------------- + // else { + if (!Character.isDigit(currentCharacter)) { + currentPosition = temp; + return false; } + // if (withoutUnicodePtr != 0) + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + return true; + // } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -583,63 +579,64 @@ public class Scanner implements IScanner, ITerminalSymbols { //Both previous lines are true if the currentCharacter is a digit base on radix //On false, no side effect has occured. - //ALL getNextChar.... ARE OPTIMIZED COPIES + //ALL getNextChar.... ARE OPTIMIZED COPIES int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (Character.digit(currentCharacter, radix) == -1) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { - if (Character.digit(currentCharacter, radix) == -1) { - currentPosition = temp; - return false; - } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1, c2, c3, c4; + // int unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0) + // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0) + // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0) + // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0)) { + // currentPosition = temp; + // return false; + // } + // + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // if (Character.digit(currentCharacter, radix) == -1) { + // currentPosition = temp; + // return false; + // } + // + // //need the unicode buffer + // if (withoutUnicodePtr == 0) { + // //buffer all the entries that have been left aside.... + // withoutUnicodePtr = currentPosition - unicodeSize - startPosition; + // System.arraycopy( + // source, + // startPosition, + // withoutUnicodeBuffer, + // 1, + // withoutUnicodePtr); + // } + // //fill the buffer with the char + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // return true; + // } //-------------end unicode traitement-------------- + // else { + if (Character.digit(currentCharacter, radix) == -1) { + currentPosition = temp; + return false; } + // if (withoutUnicodePtr != 0) + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + return true; + // } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -654,64 +651,65 @@ public class Scanner implements IScanner, ITerminalSymbols { //Both previous lines are true if the currentCharacter is a JavaIdentifierPart //On false, no side effect has occured. - //ALL getNextChar.... ARE OPTIMIZED COPIES + //ALL getNextChar.... ARE OPTIMIZED COPIES int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!isPHPIdentifierPart(currentCharacter)) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { - if (!isPHPIdentifierPart(currentCharacter)) { - currentPosition = temp; - return false; - } - - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1, c2, c3, c4; + // int unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0) + // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0) + // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0) + // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0)) { + // currentPosition = temp; + // return false; + // } + // + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // if (!isPHPIdentifierPart(currentCharacter)) { + // currentPosition = temp; + // return false; + // } + // + // //need the unicode buffer + // if (withoutUnicodePtr == 0) { + // //buffer all the entries that have been left aside.... + // withoutUnicodePtr = currentPosition - unicodeSize - startPosition; + // System.arraycopy( + // source, + // startPosition, + // withoutUnicodeBuffer, + // 1, + // withoutUnicodePtr); + // } + // //fill the buffer with the char + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // return true; + // } //-------------end unicode traitement-------------- + // else { + if (!isPHPIdentifierPart(currentCharacter)) { + currentPosition = temp; + return false; } + + // if (withoutUnicodePtr != 0) + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + return true; + // } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -719,29 +717,40 @@ public class Scanner implements IScanner, ITerminalSymbols { } public int getNextToken() throws InvalidInputException { + int htmlPosition = currentPosition; try { while (!phpMode) { - startPosition = currentPosition; currentCharacter = source[currentPosition++]; if (currentCharacter == '<') { if (getNextChar('?')) { currentCharacter = source[currentPosition++]; - if ((currentCharacter == ' ') - || Character.isWhitespace(currentCharacter)) { + if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) { // = 0) { test = getNextChar('P', 'p'); if (test >= 0) { - // ')) return TokenNameEQUAL_GREATER; return TokenNameEQUAL; @@ -1037,8 +1057,8 @@ public class Scanner implements IScanner, ITerminalSymbols { // } // } // } - // // if (getNextChar('\'')) - // // return TokenNameCharacterLiteral; + // // if (getNextChar('\'')) + // // return TokenNameCharacterLiteral; // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed // for (int lookAhead = 0; lookAhead < 20; lookAhead++) { // if (currentPosition + lookAhead == source.length) @@ -1055,19 +1075,20 @@ public class Scanner implements IScanner, ITerminalSymbols { try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } while (currentCharacter != '\'') { - /**** in PHP \r and \n are valid in string literals ****/ + /** ** in PHP \r and \n are valid in string literals *** */ // if ((currentCharacter == '\n') // || (currentCharacter == '\r')) { // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed @@ -1091,19 +1112,11 @@ public class Scanner implements IScanner, ITerminalSymbols { escapeSize = currentPosition - escapeSize; if (withoutUnicodePtr == 0) { //buffer all the entries that have been left aside.... - withoutUnicodePtr = - currentPosition - escapeSize - 1 - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; + withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = - currentCharacter; + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct withoutUnicodePtr--; } @@ -1111,15 +1124,15 @@ public class Scanner implements IScanner, ITerminalSymbols { } // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } + // } } } catch (IndexOutOfBoundsException e) { @@ -1141,35 +1154,32 @@ public class Scanner implements IScanner, ITerminalSymbols { } throw e; // rethrow } - if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. if (currentLine == null) { currentLine = new NLSLine(); lines.add(currentLine); } - currentLine.add( - new StringLiteral( - getCurrentTokenSourceString(), - startPosition, - currentPosition - 1)); + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1)); } return TokenNameStringConstant; case '"' : try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } while (currentCharacter != '"') { - /**** in PHP \r and \n are valid in string literals ****/ + /** ** in PHP \r and \n are valid in string literals *** */ // if ((currentCharacter == '\n') // || (currentCharacter == '\r')) { // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed @@ -1193,19 +1203,11 @@ public class Scanner implements IScanner, ITerminalSymbols { escapeSize = currentPosition - escapeSize; if (withoutUnicodePtr == 0) { //buffer all the entries that have been left aside.... - withoutUnicodePtr = - currentPosition - escapeSize - 1 - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; + withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = - currentCharacter; + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct withoutUnicodePtr--; } @@ -1213,15 +1215,15 @@ public class Scanner implements IScanner, ITerminalSymbols { } // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } + // } } } catch (IndexOutOfBoundsException e) { @@ -1243,35 +1245,32 @@ public class Scanner implements IScanner, ITerminalSymbols { } throw e; // rethrow } - if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. if (currentLine == null) { currentLine = new NLSLine(); lines.add(currentLine); } - currentLine.add( - new StringLiteral( - getCurrentTokenSourceString(), - startPosition, - currentPosition - 1)); + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1)); } return TokenNameStringLiteral; case '`' : try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } while (currentCharacter != '`') { - /**** in PHP \r and \n are valid in string literals ****/ + /** ** in PHP \r and \n are valid in string literals *** */ // if ((currentCharacter == '\n') // || (currentCharacter == '\r')) { // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed @@ -1295,19 +1294,11 @@ public class Scanner implements IScanner, ITerminalSymbols { escapeSize = currentPosition - escapeSize; if (withoutUnicodePtr == 0) { //buffer all the entries that have been left aside.... - withoutUnicodePtr = - currentPosition - escapeSize - 1 - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; + withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition; + System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = - currentCharacter; + withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct withoutUnicodePtr--; } @@ -1315,15 +1306,15 @@ public class Scanner implements IScanner, ITerminalSymbols { } // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } + // } } } catch (IndexOutOfBoundsException e) { @@ -1345,67 +1336,62 @@ public class Scanner implements IScanner, ITerminalSymbols { } throw e; // rethrow } - if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. if (currentLine == null) { currentLine = new NLSLine(); lines.add(currentLine); } - currentLine.add( - new StringLiteral( - getCurrentTokenSourceString(), - startPosition, - currentPosition - 1)); + currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1)); } return TokenNameStringInterpolated; case '#' : case '/' : { int test; - if ((currentCharacter == '#') - || (test = getNextChar('/', '*')) == 0) { - //line comment + if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) { + //line comment int endPositionForLineComment = 0; - try { //get the next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + try { //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c4 < 0) { + // throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + // } else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } + // } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // } //jump over the \\ boolean isUnicode = false; - while (currentCharacter != '\r' - && currentCharacter != '\n') { + while (currentCharacter != '\r' && currentCharacter != '\n') { if (currentCharacter == '?') { if (getNextChar('>')) { startPosition = currentPosition - 2; @@ -1416,46 +1402,47 @@ public class Scanner implements IScanner, ITerminalSymbols { //get the next char isUnicode = false; - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - isUnicode = true; - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue( - source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue( - source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue( - source[currentPosition++])) - > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // isUnicode = true; + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue( + // source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue( + // source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue( + // source[currentPosition++])) + // > 15 + // || c4 < 0) { + // throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + // } else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } + // } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // } //jump over the \\ } if (isUnicode) { endPositionForLineComment = currentPosition - 6; @@ -1463,8 +1450,7 @@ public class Scanner implements IScanner, ITerminalSymbols { endPositionForLineComment = currentPosition - 1; } recordComment(false); - if ((currentCharacter == '\r') - || (currentCharacter == '\n')) { + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { checkNonExternalizeString(); if (recordLineSeparator) { if (isUnicode) { @@ -1497,22 +1483,22 @@ public class Scanner implements IScanner, ITerminalSymbols { boolean isJavadoc = false, star = false; // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } if (currentCharacter == '*') { isJavadoc = true; star = true; } - if ((currentCharacter == '\r') - || (currentCharacter == '\n')) { + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { checkNonExternalizeString(); if (recordLineSeparator) { pushLineSeparator(); @@ -1520,27 +1506,27 @@ public class Scanner implements IScanner, ITerminalSymbols { currentLine = null; } } - try { //get the next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - getNextUnicodeChar(); - } + try { //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // getNextUnicodeChar(); + // } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - //jump over the \\ - } + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // //jump over the \\ + // } // empty comment is not a javadoc /**/ if (currentCharacter == '/') { isJavadoc = false; } //loop until end of comment */ while ((currentCharacter != '/') || (!star)) { - if ((currentCharacter == '\r') - || (currentCharacter == '\n')) { + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { checkNonExternalizeString(); if (recordLineSeparator) { pushLineSeparator(); @@ -1550,17 +1536,18 @@ public class Scanner implements IScanner, ITerminalSymbols { } star = currentCharacter == '*'; //get next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - getNextUnicodeChar(); - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // getNextUnicodeChar(); + // } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ + // if (currentCharacter == '\\') { + // if (source[currentPosition] == '\\') + // currentPosition++; + // } //jump over the \\ } recordComment(isJavadoc); if (tokenizeComments) { @@ -1585,7 +1572,7 @@ public class Scanner implements IScanner, ITerminalSymbols { default : if (currentCharacter == '$') { - while ( (currentCharacter = source[currentPosition++])=='$') { + while ((currentCharacter = source[currentPosition++]) == '$') { } if (currentCharacter == '{') return TokenNameDOLLAR_LBRACE; @@ -1607,51 +1594,52 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameEOF; } - public final void getNextUnicodeChar() - throws IndexOutOfBoundsException, InvalidInputException { - //VOID - //handle the case of unicode. - //when a unicode appears then we must use a buffer that holds char internal values - //At the end of this method currentCharacter holds the new visited char - //and currentPosition points right next after it - - //ALL getNextChar.... ARE OPTIMIZED COPIES - - int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - unicodeAsBackSlash = currentCharacter == '\\'; - } - /* Tokenize a method body, assuming that curly brackets are properly balanced. + // public final void getNextUnicodeChar() + // throws IndexOutOfBoundsException, InvalidInputException { + // //VOID + // //handle the case of unicode. + // //when a unicode appears then we must use a buffer that holds char internal values + // //At the end of this method currentCharacter holds the new visited char + // //and currentPosition points right next after it + // + // //ALL getNextChar.... ARE OPTIMIZED COPIES + // + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0 + // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0 + // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0 + // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0) { + // throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + // } else { + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // //need the unicode buffer + // if (withoutUnicodePtr == 0) { + // //buffer all the entries that have been left aside.... + // withoutUnicodePtr = currentPosition - unicodeSize - startPosition; + // System.arraycopy( + // source, + // startPosition, + // withoutUnicodeBuffer, + // 1, + // withoutUnicodePtr); + // } + // //fill the buffer with the char + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // unicodeAsBackSlash = currentCharacter == '\\'; + // } + /* + * Tokenize a method body, assuming that curly brackets are properly balanced. */ public final void jumpOverMethodBody() { @@ -1663,15 +1651,15 @@ public class Scanner implements IScanner, ITerminalSymbols { boolean isWhiteSpace; do { startPosition = currentPosition; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - isWhiteSpace = jumpOverUnicodeWhiteSpace(); - } else { - if (recordLineSeparator - && ((currentCharacter == '\r') || (currentCharacter == '\n'))) - pushLineSeparator(); - isWhiteSpace = Character.isWhitespace(currentCharacter); - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // isWhiteSpace = jumpOverUnicodeWhiteSpace(); + // } else { + if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + pushLineSeparator(); + isWhiteSpace = Character.isWhitespace(currentCharacter); + // } } while (isWhiteSpace); // -------consume token until } is found--------- @@ -1694,38 +1682,38 @@ public class Scanner implements IScanner, ITerminalSymbols { } catch (InvalidInputException ex) { }; } else { - try { // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } - } catch (InvalidInputException ex) { - }; + // try { // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + // } catch (InvalidInputException ex) { + // }; } getNextChar('\''); break; } case '"' : try { - try { // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } - } catch (InvalidInputException ex) { - }; + // try { // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + // } catch (InvalidInputException ex) { + // }; while (currentCharacter != '"') { if (currentCharacter == '\r') { if (source[currentPosition] == '\n') @@ -1743,19 +1731,19 @@ public class Scanner implements IScanner, ITerminalSymbols { } catch (InvalidInputException ex) { }; } - try { // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } - } catch (InvalidInputException ex) { - }; + // try { // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // } + // } catch (InvalidInputException ex) { + // }; } } catch (IndexOutOfBoundsException e) { return; @@ -1765,81 +1753,81 @@ public class Scanner implements IScanner, ITerminalSymbols { { int test; if ((test = getNextChar('/', '*')) == 0) { - //line comment + //line comment try { - //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from \n and \r - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c4 < 0) { + // //error don't care of the value + // currentCharacter = 'A'; + // } //something different from \n and \r + // else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } + // } - while (currentCharacter != '\r' - && currentCharacter != '\n') { - //get the next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from \n and \r - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + while (currentCharacter != '\r' && currentCharacter != '\n') { + //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c4 < 0) { + // //error don't care of the value + // currentCharacter = 'A'; + // } //something different from \n and \r + // else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } + // } } - if (recordLineSeparator - && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); } catch (IndexOutOfBoundsException e) { } //an eof will them be generated @@ -1848,99 +1836,98 @@ public class Scanner implements IScanner, ITerminalSymbols { if (test > 0) { //traditional and annotation comment boolean star = false; - try { // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - }; - } catch (InvalidInputException ex) { - }; + // try { // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + } + // }; + // } catch (InvalidInputException ex) { + // }; if (currentCharacter == '*') { star = true; } - if (recordLineSeparator - && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); - try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from * and / - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } - //loop until end of comment */ + try { //get the next char + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c4 < 0) { + // //error don't care of the value + // currentCharacter = 'A'; + // } //something different from * and / + // else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } + // } + //loop until end of comment */ while ((currentCharacter != '/') || (!star)) { - if (recordLineSeparator - && ((currentCharacter == '\r') - || (currentCharacter == '\n'))) + if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); star = currentCharacter == '*'; //get next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from * and / - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // //-------------unicode traitement ------------ + // int c1 = 0, c2 = 0, c3 = 0, c4 = 0; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // } + // if ((c1 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c1 < 0 + // || (c2 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c2 < 0 + // || (c3 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c3 < 0 + // || (c4 = + // Character.getNumericValue(source[currentPosition++])) + // > 15 + // || c4 < 0) { + // //error don't care of the value + // currentCharacter = 'A'; + // } //something different from * and / + // else { + // currentCharacter = + // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // } + // } } } catch (IndexOutOfBoundsException e) { return; @@ -1951,8 +1938,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } default : - if (isPHPIdentifierStart(currentCharacter) - || currentCharacter == '$') { + if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') { try { scanIdentifierOrKeyword((currentCharacter == '$')); } catch (InvalidInputException ex) { @@ -1974,52 +1960,52 @@ public class Scanner implements IScanner, ITerminalSymbols { } return; } - public final boolean jumpOverUnicodeWhiteSpace() - throws InvalidInputException { - //BOOLEAN - //handle the case of unicode. Jump over the next whiteSpace - //making startPosition pointing on the next available char - //On false, the currentCharacter is filled up with a potential - //correct char - - try { - this.wasAcr = false; - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (recordLineSeparator - && ((currentCharacter == '\r') || (currentCharacter == '\n'))) - pushLineSeparator(); - if (Character.isWhitespace(currentCharacter)) - return true; - - //buffer the new char which is not a white space - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - //withoutUnicodePtr == 1 is true here - return false; - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } - } + // public final boolean jumpOverUnicodeWhiteSpace() + // throws InvalidInputException { + // //BOOLEAN + // //handle the case of unicode. Jump over the next whiteSpace + // //making startPosition pointing on the next available char + // //On false, the currentCharacter is filled up with a potential + // //correct char + // + // try { + // this.wasAcr = false; + // int c1, c2, c3, c4; + // int unicodeSize = 6; + // currentPosition++; + // while (source[currentPosition] == 'u') { + // currentPosition++; + // unicodeSize++; + // } + // + // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c1 < 0) + // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c2 < 0) + // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c3 < 0) + // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 + // || c4 < 0)) { + // throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + // } + // + // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + // if (recordLineSeparator + // && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + // pushLineSeparator(); + // if (Character.isWhitespace(currentCharacter)) + // return true; + // + // //buffer the new char which is not a white space + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // //withoutUnicodePtr == 1 is true here + // return false; + // } catch (IndexOutOfBoundsException e) { + // throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + // } + // } public final int[] getLineEnds() { - //return a bounded copy of this.lineEnds + //return a bounded copy of this.lineEnds int[] copy; System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1); @@ -2091,13 +2077,12 @@ public class Scanner implements IScanner, ITerminalSymbols { return new char[] { charOne }; } } + final char[] optimizedCurrentTokenSource2() { //try to return the same char[] build only once char c0, c1; - int hash = - (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) - % TableSize; + int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize; char[][] table = charArray_length[0][hash]; int i = newEntry2; while (++i < InternalTableSize) { @@ -2121,14 +2106,13 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry2 = max; return r; } + final char[] optimizedCurrentTokenSource3() { //try to return the same char[] build only once char c0, c1, c2; int hash = - (((c0 = source[startPosition]) << 12) - + ((c1 = source[startPosition + 1]) << 6) - + (c2 = source[startPosition + 2])) + (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2])) % TableSize; char[][] table = charArray_length[1][hash]; int i = newEntry3; @@ -2153,6 +2137,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry3 = max; return r; } + final char[] optimizedCurrentTokenSource4() { //try to return the same char[] build only once @@ -2167,10 +2152,7 @@ public class Scanner implements IScanner, ITerminalSymbols { int i = newEntry4; while (++i < InternalTableSize) { char[] charArray = table[i]; - if ((c0 == charArray[0]) - && (c1 == charArray[1]) - && (c2 == charArray[2]) - && (c3 == charArray[3])) + if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3])) return charArray; } //---------other side--------- @@ -2178,10 +2160,7 @@ public class Scanner implements IScanner, ITerminalSymbols { int max = newEntry4; while (++i <= max) { char[] charArray = table[i]; - if ((c0 == charArray[0]) - && (c1 == charArray[1]) - && (c2 == charArray[2]) - && (c3 == charArray[3])) + if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3])) return charArray; } //--------add the entry------- @@ -2193,6 +2172,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return r; } + final char[] optimizedCurrentTokenSource5() { //try to return the same char[] build only once @@ -2208,11 +2188,7 @@ public class Scanner implements IScanner, ITerminalSymbols { int i = newEntry5; while (++i < InternalTableSize) { char[] charArray = table[i]; - if ((c0 == charArray[0]) - && (c1 == charArray[1]) - && (c2 == charArray[2]) - && (c3 == charArray[3]) - && (c4 == charArray[4])) + if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])) return charArray; } //---------other side--------- @@ -2220,11 +2196,7 @@ public class Scanner implements IScanner, ITerminalSymbols { int max = newEntry5; while (++i <= max) { char[] charArray = table[i]; - if ((c0 == charArray[0]) - && (c1 == charArray[1]) - && (c2 == charArray[2]) - && (c3 == charArray[3]) - && (c4 == charArray[4])) + if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])) return charArray; } //--------add the entry------- @@ -2236,6 +2208,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return r; } + final char[] optimizedCurrentTokenSource6() { //try to return the same char[] build only once @@ -2281,6 +2254,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry6 = max; return r; } + public final void pushLineSeparator() throws InvalidInputException { //see comment on isLineDelimiter(char) for the use of '\n' and '\r' final int INCREMENT = 250; @@ -2310,7 +2284,7 @@ public class Scanner implements IScanner, ITerminalSymbols { // look-ahead for merged cr+lf try { if (source[currentPosition] == '\n') { - //System.out.println("look-ahead LF-" + currentPosition); + //System.out.println("look-ahead LF-" + currentPosition); lineEnds[linePtr] = currentPosition; currentPosition++; wasAcr = false; @@ -2325,13 +2299,13 @@ public class Scanner implements IScanner, ITerminalSymbols { if (currentCharacter == '\n') { //must merge eventual cr followed by lf if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) { - //System.out.println("merge LF-" + (currentPosition - 1)); + //System.out.println("merge LF-" + (currentPosition - 1)); lineEnds[linePtr] = currentPosition - 1; } else { int separatorPos = currentPosition - 1; if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos)) return; - // System.out.println("LF-" + separatorPos); + // System.out.println("LF-" + separatorPos); try { lineEnds[++linePtr] = separatorPos; } catch (IndexOutOfBoundsException e) { @@ -2378,7 +2352,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } // look-ahead for merged cr+lf if (source[currentPosition] == '\n') { - //System.out.println("look-ahead LF-" + currentPosition); + //System.out.println("look-ahead LF-" + currentPosition); lineEnds[linePtr] = currentPosition; currentPosition++; wasAcr = false; @@ -2390,13 +2364,13 @@ public class Scanner implements IScanner, ITerminalSymbols { if (currentCharacter == '\n') { //must merge eventual cr followed by lf if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) { - //System.out.println("merge LF-" + (currentPosition - 1)); + //System.out.println("merge LF-" + (currentPosition - 1)); lineEnds[linePtr] = currentPosition - 6; } else { int separatorPos = currentPosition - 6; if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos)) return; - // System.out.println("LF-" + separatorPos); + // System.out.println("LF-" + separatorPos); try { lineEnds[++linePtr] = separatorPos; } catch (IndexOutOfBoundsException e) { @@ -2416,8 +2390,7 @@ public class Scanner implements IScanner, ITerminalSymbols { // a new annotation comment is recorded try { - commentStops[++commentPtr] = - isJavadoc ? currentPosition : -currentPosition; + commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition; } catch (IndexOutOfBoundsException e) { int oldStackLength = commentStops.length; int[] oldStack = commentStops; @@ -2442,24 +2415,23 @@ public class Scanner implements IScanner, ITerminalSymbols { commentPtr = -1; // reset comment stack } - public final void scanSingleQuotedEscapeCharacter() - throws InvalidInputException { + public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException { // the string with "\\u" is a legal string of two chars \ and u //thus we use a direct access to the source (for regular cases). - if (unicodeAsBackSlash) { - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - } else - currentCharacter = source[currentPosition++]; + // if (unicodeAsBackSlash) { + // // consume next character + // unicodeAsBackSlash = false; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // } + // } else + currentCharacter = source[currentPosition++]; switch (currentCharacter) { case '\'' : currentCharacter = '\''; @@ -2473,24 +2445,23 @@ public class Scanner implements IScanner, ITerminalSymbols { } } - public final void scanDoubleQuotedEscapeCharacter() - throws InvalidInputException { + public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException { // the string with "\\u" is a legal string of two chars \ and u //thus we use a direct access to the source (for regular cases). - if (unicodeAsBackSlash) { - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - } else - currentCharacter = source[currentPosition++]; + // if (unicodeAsBackSlash) { + // // consume next character + // unicodeAsBackSlash = false; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // } + // } else + currentCharacter = source[currentPosition++]; switch (currentCharacter) { // case 'b' : // currentCharacter = '\b'; @@ -2528,13 +2499,11 @@ public class Scanner implements IScanner, ITerminalSymbols { int number = Character.getNumericValue(currentCharacter); if (number >= 0 && number <= 7) { boolean zeroToThreeNot = number > 3; - if (Character - .isDigit(currentCharacter = source[currentPosition++])) { + if (Character.isDigit(currentCharacter = source[currentPosition++])) { int digit = Character.getNumericValue(currentCharacter); if (digit >= 0 && digit <= 7) { number = (number * 8) + digit; - if (Character - .isDigit(currentCharacter = source[currentPosition++])) { + if (Character.isDigit(currentCharacter = source[currentPosition++])) { if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character currentPosition--; } else { @@ -2549,7 +2518,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } else { // has read \OctalDigit NonDigit--> ignore last character currentPosition--; } - } else { // has read \OctalDigit NonOctalDigit--> ignore last character + } else { // has read \OctalDigit NonOctalDigit--> ignore last character currentPosition--; } } else { // has read \OctalDigit --> ignore last character @@ -2568,8 +2537,7 @@ public class Scanner implements IScanner, ITerminalSymbols { // return scanIdentifierOrKeyword( false ); // } - public int scanIdentifierOrKeyword(boolean isVariable) - throws InvalidInputException { + public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException { //test keywords //first dispatch on the first char. @@ -2578,47 +2546,50 @@ public class Scanner implements IScanner, ITerminalSymbols { //disptach on the second char :-)...cool....but fast ! useAssertAsAnIndentifier = false; - + while (getNextCharAsJavaIdentifierPart()) { }; if (isVariable) { + if (new String(getCurrentTokenSource()).equals("$this")) { + return TokenNamethis; + } return TokenNameVariable; } int index, length; char[] data; char firstLetter; - if (withoutUnicodePtr == 0) - - //quick test on length == 1 but not on length > 12 while most identifier - //have a length which is <= 12...but there are lots of identifier with - //only one char.... - - { - if ((length = currentPosition - startPosition) == 1) - return TokenNameIdentifier; - // data = source; - data = new char[length]; - index = startPosition; - for (int i = 0; i < length; i++) { - data[i] = Character.toLowerCase(source[index + i]); - } - index = 0; - } else { - if ((length = withoutUnicodePtr) == 1) - return TokenNameIdentifier; - // data = withoutUnicodeBuffer; - data = new char[withoutUnicodeBuffer.length]; - for (int i = 0; i < withoutUnicodeBuffer.length; i++) { - data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]); - } - index = 1; + // if (withoutUnicodePtr == 0) + + //quick test on length == 1 but not on length > 12 while most identifier + //have a length which is <= 12...but there are lots of identifier with + //only one char.... + + // { + if ((length = currentPosition - startPosition) == 1) + return TokenNameIdentifier; + // data = source; + data = new char[length]; + index = startPosition; + for (int i = 0; i < length; i++) { + data[i] = Character.toLowerCase(source[index + i]); } + index = 0; + // } else { + // if ((length = withoutUnicodePtr) == 1) + // return TokenNameIdentifier; + // // data = withoutUnicodeBuffer; + // data = new char[withoutUnicodeBuffer.length]; + // for (int i = 0; i < withoutUnicodeBuffer.length; i++) { + // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]); + // } + // index = 1; + // } firstLetter = data[index]; switch (firstLetter) { - case 'a' : // as and array + case 'a' : // as and array abstract switch (length) { case 2 : //as if ((data[++index] == 's')) { @@ -2632,21 +2603,29 @@ public class Scanner implements IScanner, ITerminalSymbols { } else { return TokenNameIdentifier; } - // case 5 : - // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y')) - // return TokenNamearray; - // else - // return TokenNameIdentifier; + case 5 : // array + if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y')) + return TokenNamearray; + else + return TokenNameIdentifier; + case 8 : + if ((data[++index] == 'b') + && (data[++index] == 's') + && (data[++index] == 't') + && (data[++index] == 'r') + && (data[++index] == 'a') + && (data[++index] == 'c') + && (data[++index] == 't')) + return TokenNameabstract; + else + return TokenNameIdentifier; default : return TokenNameIdentifier; } case 'b' : //break switch (length) { case 5 : - if ((data[++index] == 'r') - && (data[++index] == 'e') - && (data[++index] == 'a') - && (data[++index] == 'k')) + if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k')) return TokenNamebreak; else return TokenNameIdentifier; @@ -2654,21 +2633,20 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 'c' : //case class continue + case 'c' : //case catch class const continue switch (length) { case 4 : - if ((data[++index] == 'a') - && (data[++index] == 's') - && (data[++index] == 'e')) + if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e')) return TokenNamecase; else return TokenNameIdentifier; case 5 : - if ((data[++index] == 'l') - && (data[++index] == 'a') - && (data[++index] == 's') - && (data[++index] == 's')) + if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h')) + return TokenNamecatch; + if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's')) return TokenNameclass; + if ((data[index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't')) + return TokenNameconst; else return TokenNameIdentifier; case 8 : @@ -2686,7 +2664,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 'd' : //define default do + case 'd' : //define declare default do die + // TODO delete define ==> no keyword ! switch (length) { case 2 : if ((data[++index] == 'o')) @@ -2704,6 +2683,14 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; case 7 : if ((data[++index] == 'e') + && (data[++index] == 'c') + && (data[++index] == 'l') + && (data[++index] == 'a') + && (data[++index] == 'r') + && (data[++index] == 'e')) + return TokenNamedeclare; + index = 0; + if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u') @@ -2715,26 +2702,24 @@ public class Scanner implements IScanner, ITerminalSymbols { default : return TokenNameIdentifier; } - case 'e' : //echo else elseif extends + case 'e' : //echo else exit elseif extends eval switch (length) { case 4 : - if ((data[++index] == 'c') - && (data[++index] == 'h') - && (data[++index] == 'o')) + if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o')) return TokenNameecho; - else if ( - (data[index] == 'l') - && (data[++index] == 's') - && (data[++index] == 'e')) + else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e')) return TokenNameelse; + else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't')) + return TokenNameexit; + else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l')) + return TokenNameeval; else return TokenNameIdentifier; - case 5 : // endif - if ((data[++index] == 'n') - && (data[++index] == 'd') - && (data[++index] == 'i') - && (data[++index] == 'f')) + case 5 : // endif empty + if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f')) return TokenNameendif; + if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y')) + return TokenNameempty; else return TokenNameIdentifier; case 6 : // endfor @@ -2786,9 +2771,20 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameendswitch; else return TokenNameIdentifier; - case 10 : // endforeach + case 10 : // enddeclare if ((data[++index] == 'n') && (data[++index] == 'd') + && (data[++index] == 'd') + && (data[++index] == 'e') + && (data[++index] == 'c') + && (data[++index] == 'l') + && (data[++index] == 'a') + && (data[++index] == 'r') + && (data[++index] == 'e')) + return TokenNameendforeach; + index = 0; + if ((data[++index] == 'n') // endforeach + && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r') @@ -2804,7 +2800,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 'f' : //for false function + case 'f' : //for false final function switch (length) { case 3 : if ((data[++index] == 'o') && (data[++index] == 'r')) @@ -2812,14 +2808,13 @@ public class Scanner implements IScanner, ITerminalSymbols { else return TokenNameIdentifier; case 5 : - if ((data[++index] == 'a') - && (data[++index] == 'l') - && (data[++index] == 's') - && (data[++index] == 'e')) + if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e')) return TokenNamefalse; + if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l')) + return TokenNamefinal; else return TokenNameIdentifier; - case 7 : // function + case 7 : // foreach if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') @@ -2855,7 +2850,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } return TokenNameIdentifier; - case 'i' : //if int + case 'i' : //if int isset include include_once instanceof interface implements switch (length) { case 2 : if (data[++index] == 'f') @@ -2867,6 +2862,11 @@ public class Scanner implements IScanner, ITerminalSymbols { // return TokenNameint; // else // return TokenNameIdentifier; + case 5 : + if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't')) + return TokenNameisset; + else + return TokenNameIdentifier; case 7 : if ((data[++index] == 'n') && (data[++index] == 'c') @@ -2877,6 +2877,41 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameinclude; else return TokenNameIdentifier; + case 9 : // interface + if ((data[++index] == 'n') + && (data[++index] == 't') + && (data[++index] == 'e') + && (data[++index] == 'r') + && (data[++index] == 'f') + && (data[++index] == 'a') + && (data[++index] == 'c') + && (data[++index] == 'e')) + return TokenNameinterface; + else + return TokenNameIdentifier; + case 10 : // instanceof + if ((data[++index] == 'n') + && (data[++index] == 's') + && (data[++index] == 't') + && (data[++index] == 'a') + && (data[++index] == 'n') + && (data[++index] == 'c') + && (data[++index] == 'e') + && (data[++index] == 'o') + && (data[++index] == 'f')) + return TokenNameinstanceof; + if ((data[index] == 'm') + && (data[++index] == 'p') + && (data[++index] == 'l') + && (data[++index] == 'e') + && (data[++index] == 'm') + && (data[++index] == 'e') + && (data[++index] == 'n') + && (data[++index] == 't') + && (data[++index] == 's')) + return TokenNameimplements; + else + return TokenNameIdentifier; case 12 : if ((data[++index] == 'n') && (data[++index] == 'c') @@ -2898,9 +2933,7 @@ public class Scanner implements IScanner, ITerminalSymbols { case 'l' : //list if (length == 4) { - if ((data[++index] == 'i') - && (data[++index] == 's') - && (data[++index] == 't')) { + if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) { return TokenNamelist; } } @@ -2914,9 +2947,7 @@ public class Scanner implements IScanner, ITerminalSymbols { else return TokenNameIdentifier; case 4 : - if ((data[++index] == 'u') - && (data[++index] == 'l') - && (data[++index] == 'l')) + if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l')) return TokenNamenull; else return TokenNameIdentifier; @@ -2947,14 +2978,44 @@ public class Scanner implements IScanner, ITerminalSymbols { // } return TokenNameIdentifier; - case 'p' : // print - if (length == 5) { - if ((data[++index] == 'r') - && (data[++index] == 'i') - && (data[++index] == 'n') - && (data[++index] == 't')) { - return TokenNameprint; - } + case 'p' : // print public private protected + switch (length) { + case 5 : + if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) { + return TokenNameprint; + } else + return TokenNameIdentifier; + case 6 : + if ((data[++index] == 'u') + && (data[++index] == 'b') + && (data[++index] == 'l') + && (data[++index] == 'i') + && (data[++index] == 'c')) { + return TokenNamepublic; + } else + return TokenNameIdentifier; + case 7 : + if ((data[++index] == 'r') + && (data[++index] == 'i') + && (data[++index] == 'v') + && (data[++index] == 'a') + && (data[++index] == 't') + && (data[++index] == 'e')) { + return TokenNameprivate; + } else + return TokenNameIdentifier; + case 9 : + if ((data[++index] == 'r') + && (data[++index] == 'o') + && (data[++index] == 't') + && (data[++index] == 'e') + && (data[++index] == 'c') + && (data[++index] == 't') + && (data[++index] == 'e') + && (data[++index] == 'd')) { + return TokenNameprotected; + } else + return TokenNameIdentifier; } return TokenNameIdentifier; case 'r' : //return require require_once @@ -2992,14 +3053,11 @@ public class Scanner implements IScanner, ITerminalSymbols { } else return TokenNameIdentifier; - case 's' : //static switch + case 's' : //static switch switch (length) { case 6 : if (data[++index] == 't') - if ((data[++index] == 'a') - && (data[++index] == 't') - && (data[++index] == 'i') - && (data[++index] == 'c')) { + if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) { return TokenNamestatic; } else return TokenNameIdentifier; @@ -3016,24 +3074,43 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 't' : // true + case 't' : // try true throw switch (length) { - + case 3 : + if ((data[++index] == 'r') && (data[++index] == 'y')) + return TokenNametry; + else + return TokenNameIdentifier; case 4 : - if ((data[++index] == 'r') - && (data[++index] == 'u') - && (data[++index] == 'e')) + if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e')) return TokenNametrue; else return TokenNameIdentifier; - // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's')) - // return TokenNamethis; + case 5 : + if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w')) + return TokenNamethrow; + else + return TokenNameIdentifier; default : return TokenNameIdentifier; } - - case 'v' : //var + case 'u' : //use unset + switch (length) { + case 3 : + if ((data[++index] == 's') && (data[++index] == 'e')) + return TokenNameuse; + else + return TokenNameIdentifier; + case 5 : + if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't')) + return TokenNameunset; + else + return TokenNameIdentifier; + default : + return TokenNameIdentifier; + } + case 'v' : //var switch (length) { case 3 : if ((data[++index] == 'a') && (data[++index] == 'r')) @@ -3045,17 +3122,15 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 'w' : //while + case 'w' : //while switch (length) { case 5 : - if ((data[++index] == 'h') - && (data[++index] == 'i') - && (data[++index] == 'l') - && (data[++index] == 'e')) + if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e')) return TokenNamewhile; else return TokenNameIdentifier; - //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p')) + //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& + // (data[++index]=='p')) //return TokenNamewidefp ; //else //return TokenNameIdentifier; @@ -3090,14 +3165,15 @@ public class Scanner implements IScanner, ITerminalSymbols { //force the first char of the hexa number do exist... // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // } if (Character.digit(currentCharacter, 16) == -1) throw new InvalidInputException(INVALID_HEXA); //---end forcing-- @@ -3133,27 +3209,29 @@ public class Scanner implements IScanner, ITerminalSymbols { if (getNextChar('e', 'E') >= 0) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // } if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } } if (!Character.isDigit(currentCharacter)) throw new InvalidInputException(INVALID_FLOAT); @@ -3191,26 +3269,27 @@ public class Scanner implements IScanner, ITerminalSymbols { floating = true; // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - - if ((currentCharacter == '-') - || (currentCharacter == '+')) { // consume next character + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // } + + if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; + // if (((currentCharacter = source[currentPosition++]) == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + // } } if (!Character.isDigit(currentCharacter)) throw new InvalidInputException(INVALID_FLOAT); @@ -3229,7 +3308,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } /** * Search the line number corresponding to a specific position - * + * */ public final int getLineNumber(int position) { @@ -3294,27 +3373,27 @@ public class Scanner implements IScanner, ITerminalSymbols { } char end[] = new char[source.length - (currentPosition - 1)]; - System.arraycopy( - source, - (currentPosition - 1) + 1, - end, - 0, - source.length - (currentPosition - 1) - 1); + System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1); return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$ + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$ + new String(end); } public final String toStringAction(int act) { + switch (act) { case TokenNameERROR : - return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ + return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ case TokenNameStopPHP : return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameIdentifier : return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameVariable : return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + case TokenNameabstract : + return "abstract"; //$NON-NLS-1$ + case TokenNamearray : + return "array"; //$NON-NLS-1$ case TokenNameas : return "as"; //$NON-NLS-1$ case TokenNamebreak : @@ -3351,6 +3430,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "extends"; //$NON-NLS-1$ case TokenNamefalse : return "false"; //$NON-NLS-1$ + case TokenNamefinal : + return "final"; //$NON-NLS-1$ case TokenNamefor : return "for"; //$NON-NLS-1$ case TokenNameforeach : @@ -3361,10 +3442,14 @@ public class Scanner implements IScanner, ITerminalSymbols { return "global"; //$NON-NLS-1$ case TokenNameif : return "if"; //$NON-NLS-1$ + case TokenNameimplements : + return "implements"; //$NON-NLS-1$ case TokenNameinclude : return "include"; //$NON-NLS-1$ case TokenNameinclude_once : return "include_once"; //$NON-NLS-1$ + case TokenNameinterface : + return "interface"; //$NON-NLS-1$ case TokenNamelist : return "list"; //$NON-NLS-1$ case TokenNamenew : @@ -3373,6 +3458,12 @@ public class Scanner implements IScanner, ITerminalSymbols { return "null"; //$NON-NLS-1$ case TokenNameprint : return "print"; //$NON-NLS-1$ + case TokenNameprivate : + return "private"; //$NON-NLS-1$ + case TokenNameprotected : + return "protected"; //$NON-NLS-1$ + case TokenNamepublic : + return "public"; //$NON-NLS-1$ case TokenNamerequire : return "require"; //$NON-NLS-1$ case TokenNamerequire_once : @@ -3385,10 +3476,14 @@ public class Scanner implements IScanner, ITerminalSymbols { return "switch"; //$NON-NLS-1$ case TokenNametrue : return "true"; //$NON-NLS-1$ + case TokenNameunset : + return "unset"; //$NON-NLS-1$ case TokenNamevar : return "var"; //$NON-NLS-1$ case TokenNamewhile : return "while"; //$NON-NLS-1$ + case TokenNamethis : + return "$this"; //$NON-NLS-1$ case TokenNameIntegerLiteral : return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameDoubleLiteral : @@ -3408,6 +3503,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "--"; //$NON-NLS-1$ case TokenNameEQUAL_EQUAL : return "=="; //$NON-NLS-1$ + case TokenNameEQUAL_EQUAL_EQUAL : + return "==="; //$NON-NLS-1$ case TokenNameEQUAL_GREATER : return "=>"; //$NON-NLS-1$ case TokenNameLESS_EQUAL : @@ -3416,6 +3513,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return ">="; //$NON-NLS-1$ case TokenNameNOT_EQUAL : return "!="; //$NON-NLS-1$ + case TokenNameNOT_EQUAL_EQUAL : + return "!=="; //$NON-NLS-1$ case TokenNameLEFT_SHIFT : return "<<"; //$NON-NLS-1$ case TokenNameRIGHT_SHIFT : @@ -3502,20 +3601,23 @@ public class Scanner implements IScanner, ITerminalSymbols { return "${"; case TokenNameEOF : return "EOF"; //$NON-NLS-1$ + case TokenNameWHITESPACE : + return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ + case TokenNameCOMMENT_LINE : + return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ + case TokenNameCOMMENT_BLOCK : + return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ + case TokenNameCOMMENT_PHPDOC : + return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ + case TokenNameHTML : + return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ default : return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$ } } - public Scanner( - boolean tokenizeComments, - boolean tokenizeWhiteSpace, - boolean checkNonExternalizedStringLiterals) { - this( - tokenizeComments, - tokenizeWhiteSpace, - checkNonExternalizedStringLiterals, - false); + public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) { + this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false); } public Scanner( @@ -3526,8 +3628,7 @@ public class Scanner implements IScanner, ITerminalSymbols { this.eofPosition = Integer.MAX_VALUE; this.tokenizeComments = tokenizeComments; this.tokenizeWhiteSpace = tokenizeWhiteSpace; - this.checkNonExternalizedStringLiterals = - checkNonExternalizedStringLiterals; + this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals; this.assertMode = assertMode; } @@ -3573,13 +3674,199 @@ public class Scanner implements IScanner, ITerminalSymbols { } this.wasNonExternalizedStringLiteral = true; if (nonNLSCounter != lineLength) { - System.arraycopy( - this.nonNLSStrings, - 0, - (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), - 0, - nonNLSCounter); + System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter); } currentLine = null; } + + public final void scanEscapeCharacter() throws InvalidInputException { + // the string with "\\u" is a legal string of two chars \ and u + //thus we use a direct access to the source (for regular cases). + + if (unicodeAsBackSlash) { + // consume next character + unicodeAsBackSlash = false; + // if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + // } + } + } else + currentCharacter = source[currentPosition++]; + switch (currentCharacter) { + case 'b' : + currentCharacter = '\b'; + break; + case 't' : + currentCharacter = '\t'; + break; + case 'n' : + currentCharacter = '\n'; + break; + case 'f' : + currentCharacter = '\f'; + break; + case 'r' : + currentCharacter = '\r'; + break; + case '\"' : + currentCharacter = '\"'; + break; + case '\'' : + currentCharacter = '\''; + break; + case '\\' : + currentCharacter = '\\'; + break; + default : + // -----------octal escape-------------- + // OctalDigit + // OctalDigit OctalDigit + // ZeroToThree OctalDigit OctalDigit + + int number = Character.getNumericValue(currentCharacter); + if (number >= 0 && number <= 7) { + boolean zeroToThreeNot = number > 3; + if (Character.isDigit(currentCharacter = source[currentPosition++])) { + int digit = Character.getNumericValue(currentCharacter); + if (digit >= 0 && digit <= 7) { + number = (number * 8) + digit; + if (Character.isDigit(currentCharacter = source[currentPosition++])) { + if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character + currentPosition--; + } else { + digit = Character.getNumericValue(currentCharacter); + if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit + number = (number * 8) + digit; + } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character + currentPosition--; + } + } + } else { // has read \OctalDigit NonDigit--> ignore last character + currentPosition--; + } + } else { // has read \OctalDigit NonOctalDigit--> ignore last character + currentPosition--; + } + } else { // has read \OctalDigit --> ignore last character + currentPosition--; + } + if (number > 255) + throw new InvalidInputException(INVALID_ESCAPE); + currentCharacter = (char) number; + } else + throw new InvalidInputException(INVALID_ESCAPE); + } + } + + // chech presence of task: tags + public void checkTaskTag(int commentStart, int commentEnd) { + + // only look for newer task: tags + if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) { + return; + } + int foundTaskIndex = this.foundTaskCount; + nextChar : for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) { + + char[] tag = null; + char[] priority = null; + + // check for tag occurrence + nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) { + tag = this.taskTags[itag]; + priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null; + int tagLength = tag.length; + for (int t = 0; t < tagLength; t++) { + if (this.source[i + t] != tag[t]) + continue nextTag; + } + + if (this.foundTaskTags == null) { + this.foundTaskTags = new char[5][]; + this.foundTaskMessages = new char[5][]; + this.foundTaskPriorities = new char[5][]; + this.foundTaskPositions = new int[5][]; + } else if (this.foundTaskCount == this.foundTaskTags.length) { + System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount); + System.arraycopy( + this.foundTaskMessages, + 0, + this.foundTaskMessages = new char[this.foundTaskCount * 2][], + 0, + this.foundTaskCount); + System.arraycopy( + this.foundTaskPriorities, + 0, + this.foundTaskPriorities = new char[this.foundTaskCount * 2][], + 0, + this.foundTaskCount); + System.arraycopy( + this.foundTaskPositions, + 0, + this.foundTaskPositions = new int[this.foundTaskCount * 2][], + 0, + this.foundTaskCount); + } + this.foundTaskTags[this.foundTaskCount] = tag; + this.foundTaskPriorities[this.foundTaskCount] = priority; + this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 }; + this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR; + this.foundTaskCount++; + + i += tagLength - 1; // will be incremented when looping + } + } + + for (int i = foundTaskIndex; i < this.foundTaskCount; i++) { + // retrieve message start and end positions + int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length; + int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1; + // at most beginning of next task + if (max_value < msgStart) + max_value = msgStart; // would only occur if tag is before EOF. + int end = -1; + char c; + + for (int j = msgStart; j < max_value; j++) { + if ((c = this.source[j]) == '\n' || c == '\r') { + end = j - 1; + break; + } + } + + if (end == -1) { + for (int j = max_value; j > msgStart; j--) { + if ((c = this.source[j]) == '*') { + end = j - 1; + break; + } + } + if (end == -1) + end = max_value; + } + + if (msgStart == end) + continue; // empty + + // trim the message + while (CharOperation.isWhitespace(source[end]) && msgStart <= end) + end--; + while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end) + msgStart++; + + // update the end position of the task + this.foundTaskPositions[i][1] = end; + + // get the message source + final int messageLength = end - msgStart + 1; + char[] message = new char[messageLength]; + + System.arraycopy(source, msgStart, message, 0, messageLength); + this.foundTaskMessages[i] = message; + } + } + }