X-Git-Url: http://git.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java index 7fc7e5c..ac3546e 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java @@ -14,9 +14,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import net.sourceforge.phpdt.core.compiler.IScanner; -import net.sourceforge.phpdt.core.compiler.ITerminalSymbols; -import net.sourceforge.phpdt.core.compiler.InvalidInputException; +import net.sourceforge.phpdt.core.compiler.*; import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral; public class Scanner implements IScanner, ITerminalSymbols { @@ -37,6 +35,8 @@ public class Scanner implements IScanner, ITerminalSymbols { public boolean containsAssertKeyword = false; public boolean recordLineSeparator; + public boolean phpMode = false; + public char currentCharacter; public int startPosition; public int currentPosition; @@ -52,7 +52,8 @@ public class Scanner implements IScanner, ITerminalSymbols { //unicode support public char[] withoutUnicodeBuffer; - public int withoutUnicodePtr; //when == 0 ==> no unicode in the current token + public int withoutUnicodePtr; + //when == 0 ==> no unicode in the current token public boolean unicodeAsBackSlash = false; public boolean scanningFloatLiteral = false; @@ -115,11 +116,14 @@ public class Scanner implements IScanner, ITerminalSymbols { charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' }; - static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' }; - static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries + static final char[] initCharArray = + new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' }; + static final int TableSize = 30, InternalTableSize = 6; + //30*6 = 180 entries public static final int OptimizedLength = 6; public /*static*/ - final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][]; + final char[][][][] charArray_length = + new char[OptimizedLength][TableSize][InternalTableSize][]; // support for detecting non-externalized string literals int currentLineNr = -1; int previousLineNr = -1; @@ -142,18 +146,46 @@ public class Scanner implements IScanner, ITerminalSymbols { } } } - static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0; + static int newEntry2 = 0, + newEntry3 = 0, + newEntry4 = 0, + newEntry5 = 0, + newEntry6 = 0; public static final int RoundBracket = 0; public static final int SquareBracket = 1; public static final int CurlyBracket = 2; public static final int BracketKinds = 3; + + public static final boolean DEBUG = false; + public Scanner() { this(false, false); } public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) { this(tokenizeComments, tokenizeWhiteSpace, false); } + + /** + * Determines if the specified character is + * permissible as the first character in a PHP identifier + */ + public static boolean isPHPIdentifierStart(char ch) { + return Character.isLetter(ch) + || (ch == '_') + || (0x7F <= ch && ch <= 0xFF); + } + + /** + * Determines if the specified character may be part of a PHP identifier as + * other than the first character + */ + public static boolean isPHPIdentifierPart(char ch) { + return Character.isLetterOrDigit(ch) + || (ch == '_') + || (0x7F <= ch && ch <= 0xFF); + } + public final boolean atEnd() { // This code is not relevant if source is // Only a part of the real stream input @@ -164,10 +196,15 @@ public class Scanner implements IScanner, ITerminalSymbols { //return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - //0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy(withoutUnicodeBuffer, 1, result = new char[withoutUnicodePtr], 0, withoutUnicodePtr); - else { +// if (withoutUnicodePtr != 0) +// //0 is used as a fast test flag so the real first char is in position 1 +// System.arraycopy( +// withoutUnicodeBuffer, +// 1, +// result = new char[withoutUnicodePtr], +// 0, +// withoutUnicodePtr); +// else { int length = currentPosition - startPosition; switch (length) { // see OptimizedLength case 1 : @@ -184,8 +221,13 @@ public class Scanner implements IScanner, ITerminalSymbols { return optimizedCurrentTokenSource6(); } //no optimization - System.arraycopy(source, startPosition, result = new char[length], 0, length); - } + System.arraycopy( + source, + startPosition, + result = new char[length], + 0, + length); + // } return result; } public int getCurrentTokenEndPosition() { @@ -195,15 +237,50 @@ public class Scanner implements IScanner, ITerminalSymbols { // Return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - // 0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy(withoutUnicodeBuffer, 1, result = new char[withoutUnicodePtr], 0, withoutUnicodePtr); - else { +// if (withoutUnicodePtr != 0) +// // 0 is used as a fast test flag so the real first char is in position 1 +// System.arraycopy( +// withoutUnicodeBuffer, +// 1, +// result = new char[withoutUnicodePtr], +// 0, +// withoutUnicodePtr); +// else { int length; - System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length); - } + System.arraycopy( + source, + startPosition, + result = new char[length = currentPosition - startPosition], + 0, + length); +// } return result; } + + public final char[] getCurrentTokenSource(int startPos) { + // Return the token REAL source (aka unicodes are precomputed) + + char[] result; +// if (withoutUnicodePtr != 0) +// // 0 is used as a fast test flag so the real first char is in position 1 +// System.arraycopy( +// withoutUnicodeBuffer, +// 1, +// result = new char[withoutUnicodePtr], +// 0, +// withoutUnicodePtr); +// else { + int length; + System.arraycopy( + source, + startPos, + result = new char[length = currentPosition - startPos], + 0, + length); + // } + return result; + } + public final char[] getCurrentTokenSourceString() { //return the token REAL source (aka unicodes are precomputed). //REMOVE the two " that are at the beginning and the end. @@ -216,7 +293,12 @@ public class Scanner implements IScanner, ITerminalSymbols { result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2); else { int length; - System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length); + System.arraycopy( + source, + startPosition + 1, + result = new char[length = currentPosition - startPosition - 2], + 0, + length); } return result; } @@ -242,7 +324,8 @@ public class Scanner implements IScanner, ITerminalSymbols { if (lineNumber == lineEnds.length - 1) return eofPosition; - return lineEnds[lineNumber - 1]; // next line start one character behind the lineEnd of the previous line + return lineEnds[lineNumber - 1]; + // next line start one character behind the lineEnd of the previous line } /** * Search the source position corresponding to the beginning of a given line number @@ -265,7 +348,8 @@ public class Scanner implements IScanner, ITerminalSymbols { if (lineNumber == 1) return initialPosition; - return lineEnds[lineNumber - 2] + 1; // next line start one character behind the lineEnd of the previous line + return lineEnds[lineNumber - 2] + 1; + // next line start one character behind the lineEnd of the previous line } public final boolean getNextChar(char testedChar) { //BOOLEAN @@ -280,52 +364,63 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (currentCharacter != testedChar) { - currentPosition = temp; - return false; - } - unicodeAsBackSlash = currentCharacter == '\\'; - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (currentCharacter != testedChar) { +// currentPosition = temp; +// return false; +// } +// unicodeAsBackSlash = currentCharacter == '\\'; +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// +// } //-------------end unicode traitement-------------- +// else { if (currentCharacter != testedChar) { currentPosition = temp; return false; } unicodeAsBackSlash = false; - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { unicodeAsBackSlash = false; currentPosition = temp; @@ -347,45 +442,56 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { int result; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) { - currentPosition = temp; - return 2; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (currentCharacter == testedChar1) - result = 0; - else if (currentCharacter == testedChar2) - result = 1; - else { - currentPosition = temp; - return -1; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return result; - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return 2; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (currentCharacter == testedChar1) +// result = 0; +// else if (currentCharacter == testedChar2) +// result = 1; +// else { +// currentPosition = temp; +// return -1; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return result; +// } //-------------end unicode traitement-------------- +// else { if (currentCharacter == testedChar1) result = 0; else if (currentCharacter == testedChar2) @@ -395,10 +501,10 @@ public class Scanner implements IScanner, ITerminalSymbols { return -1; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return result; - } + // } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return -1; @@ -417,49 +523,60 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!Character.isDigit(currentCharacter)) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (!Character.isDigit(currentCharacter)) { +// currentPosition = temp; +// return false; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// } //-------------end unicode traitement-------------- +// else { if (!Character.isDigit(currentCharacter)) { currentPosition = temp; return false; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -478,49 +595,60 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (Character.digit(currentCharacter, radix) == -1) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (Character.digit(currentCharacter, radix) == -1) { +// currentPosition = temp; +// return false; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// } //-------------end unicode traitement-------------- +// else { if (Character.digit(currentCharacter, radix) == -1) { currentPosition = temp; return false; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -539,583 +667,1030 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!Character.isJavaIdentifierPart(currentCharacter)) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { - if (!Character.isJavaIdentifierPart(currentCharacter)) { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (!isPHPIdentifierPart(currentCharacter)) { +// currentPosition = temp; +// return false; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// } //-------------end unicode traitement-------------- +// else { + if (!isPHPIdentifierPart(currentCharacter)) { currentPosition = temp; return false; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; } } - public int getNextToken() throws InvalidInputException { - this.wasAcr = false; - if (diet) { - jumpOverMethodBody(); - diet = false; - return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE; - } + public int getNextToken() throws InvalidInputException { + int htmlPosition = currentPosition; try { - while (true) { //loop for jumping over comments - withoutUnicodePtr = 0; - //start with a new token (even comment written with unicode ) - - // ---------Consume white space and handles startPosition--------- - int whiteStart = currentPosition; - boolean isWhiteSpace; - do { - startPosition = currentPosition; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - isWhiteSpace = jumpOverUnicodeWhiteSpace(); - } else { - if ((currentCharacter == '\r') || (currentCharacter == '\n')) { - checkNonExternalizeString(); - if (recordLineSeparator) { - pushLineSeparator(); - } else { - currentLine = null; + while (!phpMode) { + currentCharacter = source[currentPosition++]; + if (currentCharacter == '<') { + if (getNextChar('?')) { + currentCharacter = source[currentPosition++]; + if ((currentCharacter == ' ') + || Character.isWhitespace(currentCharacter)) { + // = 0) { + test = getNextChar('P', 'p'); + if (test >= 0) { + // eofPosition) - return TokenNameEOF; - // ---------Identify the next token------------- + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { + if (recordLineSeparator) { + pushLineSeparator(); + } else { + currentLine = null; + } + } + } + } //-----------------end switch while try-------------------- + catch (IndexOutOfBoundsException e) { + if (tokenizeWhiteSpace) { + // && (whiteStart != currentPosition - 1)) { + // reposition scanner in case we are interested by spaces as tokens + startPosition = htmlPosition; + } + return TokenNameEOF; + } - switch (currentCharacter) { - case '(' : - return TokenNameLPAREN; - case ')' : - return TokenNameRPAREN; - case '{' : - return TokenNameLBRACE; - case '}' : - return TokenNameRBRACE; - case '[' : - return TokenNameLBRACKET; - case ']' : - return TokenNameRBRACKET; - case ';' : - return TokenNameSEMICOLON; - case ',' : - return TokenNameCOMMA; - case '.' : - if (getNextCharAsDigit()) - return scanNumber(true); - return TokenNameDOT; - case '+' : - { - int test; - if ((test = getNextChar('+', '=')) == 0) - return TokenNamePLUS_PLUS; - if (test > 0) - return TokenNamePLUS_EQUAL; - return TokenNamePLUS; - } - case '-' : - { - int test; - if ((test = getNextChar('-', '=')) == 0) - return TokenNameMINUS_MINUS; - if (test > 0) - return TokenNameMINUS_EQUAL; - return TokenNameMINUS; - } - case '~' : - return TokenNameTWIDDLE; - case '!' : - if (getNextChar('=')) - return TokenNameNOT_EQUAL; - return TokenNameNOT; - case '*' : - if (getNextChar('=')) - return TokenNameMULTIPLY_EQUAL; - return TokenNameMULTIPLY; - case '%' : - if (getNextChar('=')) - return TokenNameREMAINDER_EQUAL; - return TokenNameREMAINDER; - case '<' : - { - int test; - if ((test = getNextChar('=', '<')) == 0) - return TokenNameLESS_EQUAL; - if (test > 0) { - if (getNextChar('=')) - return TokenNameLEFT_SHIFT_EQUAL; - return TokenNameLEFT_SHIFT; - } - return TokenNameLESS; - } - case '>' : - { - int test; - if ((test = getNextChar('=', '>')) == 0) - return TokenNameGREATER_EQUAL; - if (test > 0) { - if ((test = getNextChar('=', '>')) == 0) - return TokenNameRIGHT_SHIFT_EQUAL; - if (test > 0) { - if (getNextChar('=')) - return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL; - return TokenNameUNSIGNED_RIGHT_SHIFT; + if (phpMode) { + this.wasAcr = false; + if (diet) { + jumpOverMethodBody(); + diet = false; + return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE; + } + try { + while (true) { //loop for jumping over comments + withoutUnicodePtr = 0; + //start with a new token (even comment written with unicode ) + + // ---------Consume white space and handles startPosition--------- + int whiteStart = currentPosition; + boolean isWhiteSpace; + do { + startPosition = currentPosition; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// isWhiteSpace = jumpOverUnicodeWhiteSpace(); +// } else { + if ((currentCharacter == '\r') || (currentCharacter == '\n')) { + checkNonExternalizeString(); + if (recordLineSeparator) { + pushLineSeparator(); + } else { + currentLine = null; } - return TokenNameRIGHT_SHIFT; } - return TokenNameGREATER; - } - case '=' : - if (getNextChar('=')) - return TokenNameEQUAL_EQUAL; - return TokenNameEQUAL; - case '&' : - { - int test; - if ((test = getNextChar('&', '=')) == 0) - return TokenNameAND_AND; - if (test > 0) - return TokenNameAND_EQUAL; - return TokenNameAND; - } - case '|' : - { - int test; - if ((test = getNextChar('|', '=')) == 0) - return TokenNameOR_OR; - if (test > 0) - return TokenNameOR_EQUAL; - return TokenNameOR; - } - case '^' : - if (getNextChar('=')) - return TokenNameXOR_EQUAL; - return TokenNameXOR; - case '?' : - return TokenNameQUESTION; - case ':' : - return TokenNameCOLON; - case '\'' : - { - int test; - if ((test = getNextChar('\n', '\r')) == 0) { - throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); + isWhiteSpace = + (currentCharacter == ' ') + || Character.isWhitespace(currentCharacter); +// } + } while (isWhiteSpace); + if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) { + // reposition scanner in case we are interested by spaces as tokens + currentPosition--; + startPosition = whiteStart; + return TokenNameWHITESPACE; + } + //little trick to get out in the middle of a source compuation + if (currentPosition > eofPosition) + return TokenNameEOF; + + // ---------Identify the next token------------- + + switch (currentCharacter) { + case '(' : + return TokenNameLPAREN; + case ')' : + return TokenNameRPAREN; + case '{' : + return TokenNameLBRACE; + case '}' : + return TokenNameRBRACE; + case '[' : + return TokenNameLBRACKET; + case ']' : + return TokenNameRBRACKET; + case ';' : + return TokenNameSEMICOLON; + case ',' : + return TokenNameCOMMA; + + case '.' : + if (getNextCharAsDigit()) + return scanNumber(true); + return TokenNameDOT; + case '+' : + { + int test; + if ((test = getNextChar('+', '=')) == 0) + return TokenNamePLUS_PLUS; + if (test > 0) + return TokenNamePLUS_EQUAL; + return TokenNamePLUS; } - if (test > 0) { - // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 3; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\'') { - currentPosition += lookAhead + 1; - break; + case '-' : + { + int test; + if ((test = getNextChar('-', '=')) == 0) + return TokenNameMINUS_MINUS; + if (test > 0) + return TokenNameMINUS_EQUAL; + if (getNextChar('>')) + return TokenNameMINUS_GREATER; + + return TokenNameMINUS; + } + case '~' : + if (getNextChar('=')) + return TokenNameTWIDDLE_EQUAL; + return TokenNameTWIDDLE; + case '!' : + if (getNextChar('=')) + return TokenNameNOT_EQUAL; + return TokenNameNOT; + case '*' : + if (getNextChar('=')) + return TokenNameMULTIPLY_EQUAL; + return TokenNameMULTIPLY; + case '%' : + if (getNextChar('=')) + return TokenNameREMAINDER_EQUAL; + return TokenNameREMAINDER; + case '<' : + { + int test; + if ((test = getNextChar('=', '<')) == 0) + return TokenNameLESS_EQUAL; + if (test > 0) { + if (getNextChar('=')) + return TokenNameLEFT_SHIFT_EQUAL; + if (getNextChar('<')) { + int heredocStart = currentPosition; + int heredocLength = 0; + currentCharacter = source[currentPosition++]; + if (isPHPIdentifierStart(currentCharacter)) { + currentCharacter = source[currentPosition++]; + } else { + return TokenNameERROR; + } + while (isPHPIdentifierPart(currentCharacter)) { + currentCharacter = source[currentPosition++]; + } + + heredocLength = currentPosition - heredocStart - 1; + + // heredoc end-tag determination + boolean endTag = true; + char ch; + do { + ch = source[currentPosition++]; + if (ch == '\r' || ch == '\n') { + if (recordLineSeparator) { + pushLineSeparator(); + } else { + currentLine = null; + } + for (int i = 0; i < heredocLength; i++) { + if (source[currentPosition + i] + != source[heredocStart + i]) { + endTag = false; + break; + } + } + if (endTag) { + currentPosition += heredocLength - 1; + currentCharacter = source[currentPosition++]; + break; // do...while loop + } else { + endTag = true; + } + } + + } while (true); + + return TokenNameHEREDOC; } + return TokenNameLEFT_SHIFT; } - throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); + return TokenNameLESS; } - } - if (getNextChar('\'')) { - // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 3; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\'') { - currentPosition += lookAhead + 1; - break; + case '>' : + { + int test; + if ((test = getNextChar('=', '>')) == 0) + return TokenNameGREATER_EQUAL; + if (test > 0) { + if ((test = getNextChar('=', '>')) == 0) + return TokenNameRIGHT_SHIFT_EQUAL; + return TokenNameRIGHT_SHIFT; } + return TokenNameGREATER; } - throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); - } - if (getNextChar('\\')) - scanEscapeCharacter(); - else { // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } + case '=' : + if (getNextChar('=')) + return TokenNameEQUAL_EQUAL; + if (getNextChar('>')) + return TokenNameEQUAL_GREATER; + return TokenNameEQUAL; + case '&' : + { + int test; + if ((test = getNextChar('&', '=')) == 0) + return TokenNameAND_AND; + if (test > 0) + return TokenNameAND_EQUAL; + return TokenNameAND; } - } - if (getNextChar('\'')) - return TokenNameCharacterLiteral; - // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 20; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\'') { - currentPosition += lookAhead + 1; - break; + case '|' : + { + int test; + if ((test = getNextChar('|', '=')) == 0) + return TokenNameOR_OR; + if (test > 0) + return TokenNameOR_EQUAL; + return TokenNameOR; } - } - throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); - case '"' : - try { - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } + case '^' : + if (getNextChar('=')) + return TokenNameXOR_EQUAL; + return TokenNameXOR; + case '?' : + if (getNextChar('>')) { + phpMode = false; + return TokenNameStopPHP; } + return TokenNameQUESTION; + case ':' : + if (getNextChar(':')) + return TokenNameCOLON_COLON; + return TokenNameCOLON; + case '@' : + return TokenNameAT; + // case '\'' : + // { + // int test; + // if ((test = getNextChar('\n', '\r')) == 0) { + // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); + // } + // if (test > 0) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; + // lookAhead < 3; + // lookAhead++) { + // if (currentPosition + lookAhead + // == source.length) + // break; + // if (source[currentPosition + lookAhead] + // == '\n') + // break; + // if (source[currentPosition + lookAhead] + // == '\'') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); + // } + // } + // if (getNextChar('\'')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; + // lookAhead < 3; + // lookAhead++) { + // if (currentPosition + lookAhead + // == source.length) + // break; + // if (source[currentPosition + lookAhead] + // == '\n') + // break; + // if (source[currentPosition + lookAhead] + // == '\'') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); + // } + // if (getNextChar('\\')) + // scanEscapeCharacter(); + // else { // consume next character + // unicodeAsBackSlash = false; + // if (((currentCharacter = source[currentPosition++]) + // == '\\') + // && (source[currentPosition] == 'u')) { + // getNextUnicodeChar(); + // } else { + // if (withoutUnicodePtr != 0) { + // withoutUnicodeBuffer[++withoutUnicodePtr] = + // currentCharacter; + // } + // } + // } + // // if (getNextChar('\'')) + // // return TokenNameCharacterLiteral; + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 20; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\'') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT); + case '\'' : + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } + + while (currentCharacter != '\'') { + + /**** in PHP \r and \n are valid in string literals ****/ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one + scanSingleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = + currentPosition - escapeSize - 1 - startPosition; + System.arraycopy( + source, + startPosition, + withoutUnicodeBuffer, + 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = + currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct + withoutUnicodePtr--; + } + } + } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } +// } - while (currentCharacter != '"') { - /**** \r and \n are not valid in string literals ****/ - if ((currentCharacter == '\n') || (currentCharacter == '\r')) { + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed for (int lookAhead = 0; lookAhead < 50; lookAhead++) { if (currentPosition + lookAhead == source.length) break; if (source[currentPosition + lookAhead] == '\n') break; - if (source[currentPosition + lookAhead] == '\"') { + if (source[currentPosition + lookAhead] == '\'') { currentPosition += lookAhead + 1; break; } } - throw new InvalidInputException(INVALID_CHAR_IN_STRING); + } - if (currentCharacter == '\\') { - int escapeSize = currentPosition; - boolean backSlashAsUnicodeInString = unicodeAsBackSlash; - //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one - scanEscapeCharacter(); - escapeSize = currentPosition - escapeSize; - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } else { //overwrite the / in the buffer - withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter; - if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct - withoutUnicodePtr--; - } - } + throw e; // rethrow + } + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); } + currentLine.add( + new StringLiteral( + getCurrentTokenSourceString(), + startPosition, + currentPosition - 1)); + } + return TokenNameStringConstant; + case '"' : + try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } + + while (currentCharacter != '"') { + + /**** in PHP \r and \n are valid in string literals ****/ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = + currentPosition - escapeSize - 1 - startPosition; + System.arraycopy( + source, + startPosition, + withoutUnicodeBuffer, + 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = + currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct + withoutUnicodePtr--; + } + } } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } +// } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '\"') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_STRING); - } catch (InvalidInputException e) { - if (e.getMessage().equals(INVALID_ESCAPE)) { - // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed - for (int lookAhead = 0; lookAhead < 50; lookAhead++) { - if (currentPosition + lookAhead == source.length) - break; - if (source[currentPosition + lookAhead] == '\n') - break; - if (source[currentPosition + lookAhead] == '\"') { - currentPosition += lookAhead + 1; - break; + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add( + new StringLiteral( + getCurrentTokenSourceString(), + startPosition, + currentPosition - 1)); + } + return TokenNameStringLiteral; + case '`' : + try { + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } + + while (currentCharacter != '`') { + + /**** in PHP \r and \n are valid in string literals ****/ + // if ((currentCharacter == '\n') + // || (currentCharacter == '\r')) { + // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + // for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + // if (currentPosition + lookAhead == source.length) + // break; + // if (source[currentPosition + lookAhead] == '\n') + // break; + // if (source[currentPosition + lookAhead] == '\"') { + // currentPosition += lookAhead + 1; + // break; + // } + // } + // throw new InvalidInputException(INVALID_CHAR_IN_STRING); + // } + if (currentCharacter == '\\') { + int escapeSize = currentPosition; + boolean backSlashAsUnicodeInString = unicodeAsBackSlash; + //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one + scanDoubleQuotedEscapeCharacter(); + escapeSize = currentPosition - escapeSize; + if (withoutUnicodePtr == 0) { + //buffer all the entries that have been left aside.... + withoutUnicodePtr = + currentPosition - escapeSize - 1 - startPosition; + System.arraycopy( + source, + startPosition, + withoutUnicodeBuffer, + 1, + withoutUnicodePtr); + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } else { //overwrite the / in the buffer + withoutUnicodeBuffer[withoutUnicodePtr] = + currentCharacter; + if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct + withoutUnicodePtr--; + } + } } + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { + if (withoutUnicodePtr != 0) { + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; + } +// } + } + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_STRING); + } catch (InvalidInputException e) { + if (e.getMessage().equals(INVALID_ESCAPE)) { + // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed + for (int lookAhead = 0; lookAhead < 50; lookAhead++) { + if (currentPosition + lookAhead == source.length) + break; + if (source[currentPosition + lookAhead] == '\n') + break; + if (source[currentPosition + lookAhead] == '`') { + currentPosition += lookAhead + 1; + break; + } + } + } + throw e; // rethrow } - throw e; // rethrow - } - if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. - if (currentLine == null) { - currentLine = new NLSLine(); - lines.add(currentLine); + if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int. + if (currentLine == null) { + currentLine = new NLSLine(); + lines.add(currentLine); + } + currentLine.add( + new StringLiteral( + getCurrentTokenSourceString(), + startPosition, + currentPosition - 1)); } - currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1)); - } - return TokenNameStringLiteral; - case '/' : - { - int test; - if ((test = getNextChar('/', '*')) == 0) { //line comment - int endPositionForLineComment = 0; - try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; + return TokenNameStringInterpolated; + case '#' : + case '/' : + { + int test; + if ((currentCharacter == '#') + || (test = getNextChar('/', '*')) == 0) { + //line comment + int endPositionForLineComment = 0; + try { //get the next char + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } + + //handle the \\u case manually into comment +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// } //jump over the \\ + boolean isUnicode = false; + while (currentCharacter != '\r' + && currentCharacter != '\n') { + if (currentCharacter == '?') { + if (getNextChar('>')) { + startPosition = currentPosition - 2; + phpMode = false; + return TokenNameStopPHP; + } + } + + //get the next char + isUnicode = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// isUnicode = true; +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue( +// source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue( +// source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue( +// source[currentPosition++])) +// > 15 +// || c4 < 0) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } + //handle the \\u case manually into comment +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// } //jump over the \\ } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + if (isUnicode) { + endPositionForLineComment = currentPosition - 6; } else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + endPositionForLineComment = currentPosition - 1; } - } - - //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ - boolean isUnicode = false; - while (currentCharacter != '\r' && currentCharacter != '\n') { - //get the next char - isUnicode = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - isUnicode = true; - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); + recordComment(false); + if ((currentCharacter == '\r') + || (currentCharacter == '\n')) { + checkNonExternalizeString(); + if (recordLineSeparator) { + if (isUnicode) { + pushUnicodeLineSeparator(); + } else { + pushLineSeparator(); + } } else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); + currentLine = null; } } - //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ + if (tokenizeComments) { + if (!isUnicode) { + currentPosition = endPositionForLineComment; + // reset one character behind + } + return TokenNameCOMMENT_LINE; + } + } catch (IndexOutOfBoundsException e) { //an eof will them be generated + if (tokenizeComments) { + currentPosition--; + // reset one character behind + return TokenNameCOMMENT_LINE; + } } - if (isUnicode) { - endPositionForLineComment = currentPosition - 6; - } else { - endPositionForLineComment = currentPosition - 1; + break; + } + if (test > 0) { + //traditional and annotation comment + boolean isJavadoc = false, star = false; + // consume next character + unicodeAsBackSlash = false; + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } + + if (currentCharacter == '*') { + isJavadoc = true; + star = true; } - recordComment(false); - if ((currentCharacter == '\r') || (currentCharacter == '\n')) { + if ((currentCharacter == '\r') + || (currentCharacter == '\n')) { checkNonExternalizeString(); if (recordLineSeparator) { - if (isUnicode) { - pushUnicodeLineSeparator(); - } else { - pushLineSeparator(); - } + pushLineSeparator(); } else { currentLine = null; } } - if (tokenizeComments) { - if (!isUnicode) { - currentPosition = endPositionForLineComment; // reset one character behind + try { //get the next char + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// getNextUnicodeChar(); +// } + //handle the \\u case manually into comment +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// //jump over the \\ +// } + // empty comment is not a javadoc /**/ + if (currentCharacter == '/') { + isJavadoc = false; } - return TokenNameCOMMENT_LINE; - } - } catch (IndexOutOfBoundsException e) { //an eof will them be generated - if (tokenizeComments) { - currentPosition--; // reset one character behind - return TokenNameCOMMENT_LINE; - } - } - break; - } - if (test > 0) { //traditional and annotation comment - boolean isJavadoc = false, star = false; - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - - if (currentCharacter == '*') { - isJavadoc = true; - star = true; - } - if ((currentCharacter == '\r') || (currentCharacter == '\n')) { - checkNonExternalizeString(); - if (recordLineSeparator) { - pushLineSeparator(); - } else { - currentLine = null; - } - } - try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - getNextUnicodeChar(); - } - //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; //jump over the \\ - } - // empty comment is not a javadoc /**/ - if (currentCharacter == '/') { - isJavadoc = false; - } - //loop until end of comment */ - while ((currentCharacter != '/') || (!star)) { - if ((currentCharacter == '\r') || (currentCharacter == '\n')) { - checkNonExternalizeString(); - if (recordLineSeparator) { - pushLineSeparator(); - } else { - currentLine = null; + //loop until end of comment */ + while ((currentCharacter != '/') || (!star)) { + if ((currentCharacter == '\r') + || (currentCharacter == '\n')) { + checkNonExternalizeString(); + if (recordLineSeparator) { + pushLineSeparator(); + } else { + currentLine = null; + } } + star = currentCharacter == '*'; + //get next char + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// getNextUnicodeChar(); +// } + //handle the \\u case manually into comment +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// } //jump over the \\ } - star = currentCharacter == '*'; - //get next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - getNextUnicodeChar(); + recordComment(isJavadoc); + if (tokenizeComments) { + if (isJavadoc) + return TokenNameCOMMENT_PHPDOC; + return TokenNameCOMMENT_BLOCK; } - //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ + } catch (IndexOutOfBoundsException e) { + throw new InvalidInputException(UNTERMINATED_COMMENT); } - recordComment(isJavadoc); - if (tokenizeComments) { - if (isJavadoc) - return TokenNameCOMMENT_JAVADOC; - return TokenNameCOMMENT_BLOCK; - } - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(UNTERMINATED_COMMENT); + break; } - break; + if (getNextChar('=')) + return TokenNameDIVIDE_EQUAL; + return TokenNameDIVIDE; } - if (getNextChar('=')) - return TokenNameDIVIDE_EQUAL; - return TokenNameDIVIDE; - } - case '\u001a' : - if (atEnd()) - return TokenNameEOF; - //the atEnd may not be if source is only some part of a real (external) stream - throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$ - - default : - if (Character.isJavaIdentifierStart(currentCharacter)) - return scanIdentifierOrKeyword(); - if (Character.isDigit(currentCharacter)) - return scanNumber(false); - return TokenNameERROR; + case '\u001a' : + if (atEnd()) + return TokenNameEOF; + //the atEnd may not be if source is only some part of a real (external) stream + throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$ + + default : + if (currentCharacter == '$') { + while ((currentCharacter = source[currentPosition++]) == '$') { + } + if (currentCharacter == '{') + return TokenNameDOLLAR_LBRACE; + if (isPHPIdentifierStart(currentCharacter)) + return scanIdentifierOrKeyword(true); + return TokenNameERROR; + } + if (isPHPIdentifierStart(currentCharacter)) + return scanIdentifierOrKeyword(false); + if (Character.isDigit(currentCharacter)) + return scanNumber(false); + return TokenNameERROR; + } } + } //-----------------end switch while try-------------------- + catch (IndexOutOfBoundsException e) { } - } //-----------------end switch while try-------------------- - catch (IndexOutOfBoundsException e) { } return TokenNameEOF; } - public final void getNextUnicodeChar() throws IndexOutOfBoundsException, InvalidInputException { - //VOID - //handle the case of unicode. - //when a unicode appears then we must use a buffer that holds char internal values - //At the end of this method currentCharacter holds the new visited char - //and currentPosition points right next after it - - //ALL getNextChar.... ARE OPTIMIZED COPIES - - int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - unicodeAsBackSlash = currentCharacter == '\\'; - } +// public final void getNextUnicodeChar() +// throws IndexOutOfBoundsException, InvalidInputException { +// //VOID +// //handle the case of unicode. +// //when a unicode appears then we must use a buffer that holds char internal values +// //At the end of this method currentCharacter holds the new visited char +// //and currentPosition points right next after it +// +// //ALL getNextChar.... ARE OPTIMIZED COPIES +// +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0 +// || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0 +// || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0 +// || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } else { +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// unicodeAsBackSlash = currentCharacter == '\\'; +// } /* Tokenize a method body, assuming that curly brackets are properly balanced. */ public final void jumpOverMethodBody() { @@ -1128,13 +1703,16 @@ public class Scanner implements IScanner, ITerminalSymbols { boolean isWhiteSpace; do { startPosition = currentPosition; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - isWhiteSpace = jumpOverUnicodeWhiteSpace(); - } else { - if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// isWhiteSpace = jumpOverUnicodeWhiteSpace(); +// } else { + if (recordLineSeparator + && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); isWhiteSpace = Character.isWhitespace(currentCharacter); - } +// } } while (isWhiteSpace); // -------consume token until } is found--------- @@ -1153,64 +1731,75 @@ public class Scanner implements IScanner, ITerminalSymbols { test = getNextChar('\\'); if (test) { try { - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); } catch (InvalidInputException ex) { }; } else { - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; } - } - } catch (InvalidInputException ex) { - }; +// } +// } catch (InvalidInputException ex) { +// }; } getNextChar('\''); break; } case '"' : try { - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; } - } - } catch (InvalidInputException ex) { - }; +// } +// } catch (InvalidInputException ex) { +// }; while (currentCharacter != '"') { if (currentCharacter == '\r') { if (source[currentPosition] == '\n') currentPosition++; - break; // the string cannot go further that the line + break; + // the string cannot go further that the line } if (currentCharacter == '\n') { - break; // the string cannot go further that the line + break; + // the string cannot go further that the line } if (currentCharacter == '\\') { try { - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); } catch (InvalidInputException ex) { }; } - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; } - } - } catch (InvalidInputException ex) { - }; +// } +// } catch (InvalidInputException ex) { +// }; } } catch (IndexOutOfBoundsException e) { return; @@ -1219,128 +1808,188 @@ public class Scanner implements IScanner, ITerminalSymbols { case '/' : { int test; - if ((test = getNextChar('/', '*')) == 0) { //line comment + if ((test = getNextChar('/', '*')) == 0) { + //line comment try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { //error don't care of the value - currentCharacter = 'A'; - } //something different from \n and \r - else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } - - while (currentCharacter != '\r' && currentCharacter != '\n') { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from \n and \r +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } + + while (currentCharacter != '\r' + && currentCharacter != '\n') { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { //error don't care of the value - currentCharacter = 'A'; - } //something different from \n and \r - else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from \n and \r +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } } - if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + if (recordLineSeparator + && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); } catch (IndexOutOfBoundsException e) { } //an eof will them be generated break; } - if (test > 0) { //traditional and annotation comment + if (test > 0) { + //traditional and annotation comment boolean star = false; - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; + withoutUnicodeBuffer[++withoutUnicodePtr] = + currentCharacter; } - }; - } catch (InvalidInputException ex) { - }; +// }; +// } catch (InvalidInputException ex) { +// }; if (currentCharacter == '*') { star = true; } - if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + if (recordLineSeparator + && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { //error don't care of the value - currentCharacter = 'A'; - } //something different from * and / - else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from * and / +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } //loop until end of comment */ while ((currentCharacter != '/') || (!star)) { - if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) + if (recordLineSeparator + && ((currentCharacter == '\r') + || (currentCharacter == '\n'))) pushLineSeparator(); star = currentCharacter == '*'; //get next char - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { //error don't care of the value - currentCharacter = 'A'; - } //something different from * and / - else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from * and / +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } } } catch (IndexOutOfBoundsException e) { return; @@ -1351,9 +2000,10 @@ public class Scanner implements IScanner, ITerminalSymbols { } default : - if (Character.isJavaIdentifierStart(currentCharacter)) { + if (isPHPIdentifierStart(currentCharacter) + || currentCharacter == '$') { try { - scanIdentifierOrKeyword(); + scanIdentifierOrKeyword((currentCharacter == '$')); } catch (InvalidInputException ex) { }; break; @@ -1373,44 +2023,50 @@ public class Scanner implements IScanner, ITerminalSymbols { } return; } - public final boolean jumpOverUnicodeWhiteSpace() throws InvalidInputException { - //BOOLEAN - //handle the case of unicode. Jump over the next whiteSpace - //making startPosition pointing on the next available char - //On false, the currentCharacter is filled up with a potential - //correct char - - try { - this.wasAcr = false; - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) - pushLineSeparator(); - if (Character.isWhitespace(currentCharacter)) - return true; - - //buffer the new char which is not a white space - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - //withoutUnicodePtr == 1 is true here - return false; - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } - } +// public final boolean jumpOverUnicodeWhiteSpace() +// throws InvalidInputException { +// //BOOLEAN +// //handle the case of unicode. Jump over the next whiteSpace +// //making startPosition pointing on the next available char +// //On false, the currentCharacter is filled up with a potential +// //correct char +// +// try { +// this.wasAcr = false; +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (recordLineSeparator +// && ((currentCharacter == '\r') || (currentCharacter == '\n'))) +// pushLineSeparator(); +// if (Character.isWhitespace(currentCharacter)) +// return true; +// +// //buffer the new char which is not a white space +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// //withoutUnicodePtr == 1 is true here +// return false; +// } catch (IndexOutOfBoundsException e) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } +// } public final int[] getLineEnds() { //return a bounded copy of this.lineEnds @@ -1484,11 +2140,14 @@ public class Scanner implements IScanner, ITerminalSymbols { return new char[] { charOne }; } } + final char[] optimizedCurrentTokenSource2() { //try to return the same char[] build only once char c0, c1; - int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize; + int hash = + (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) + % TableSize; char[][] table = charArray_length[0][hash]; int i = newEntry2; while (++i < InternalTableSize) { @@ -1512,12 +2171,15 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry2 = max; return r; } + final char[] optimizedCurrentTokenSource3() { //try to return the same char[] build only once char c0, c1, c2; int hash = - (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2])) + (((c0 = source[startPosition]) << 12) + + ((c1 = source[startPosition + 1]) << 6) + + (c2 = source[startPosition + 2])) % TableSize; char[][] table = charArray_length[1][hash]; int i = newEntry3; @@ -1542,6 +2204,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry3 = max; return r; } + final char[] optimizedCurrentTokenSource4() { //try to return the same char[] build only once @@ -1556,7 +2219,10 @@ public class Scanner implements IScanner, ITerminalSymbols { int i = newEntry4; while (++i < InternalTableSize) { char[] charArray = table[i]; - if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3])) + if ((c0 == charArray[0]) + && (c1 == charArray[1]) + && (c2 == charArray[2]) + && (c3 == charArray[3])) return charArray; } //---------other side--------- @@ -1564,7 +2230,10 @@ public class Scanner implements IScanner, ITerminalSymbols { int max = newEntry4; while (++i <= max) { char[] charArray = table[i]; - if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3])) + if ((c0 == charArray[0]) + && (c1 == charArray[1]) + && (c2 == charArray[2]) + && (c3 == charArray[3])) return charArray; } //--------add the entry------- @@ -1576,6 +2245,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return r; } + final char[] optimizedCurrentTokenSource5() { //try to return the same char[] build only once @@ -1591,7 +2261,11 @@ public class Scanner implements IScanner, ITerminalSymbols { int i = newEntry5; while (++i < InternalTableSize) { char[] charArray = table[i]; - if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])) + if ((c0 == charArray[0]) + && (c1 == charArray[1]) + && (c2 == charArray[2]) + && (c3 == charArray[3]) + && (c4 == charArray[4])) return charArray; } //---------other side--------- @@ -1599,7 +2273,11 @@ public class Scanner implements IScanner, ITerminalSymbols { int max = newEntry5; while (++i <= max) { char[] charArray = table[i]; - if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])) + if ((c0 == charArray[0]) + && (c1 == charArray[1]) + && (c2 == charArray[2]) + && (c3 == charArray[3]) + && (c4 == charArray[4])) return charArray; } //--------add the entry------- @@ -1611,6 +2289,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return r; } + final char[] optimizedCurrentTokenSource6() { //try to return the same char[] build only once @@ -1656,6 +2335,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry6 = max; return r; } + public final void pushLineSeparator() throws InvalidInputException { //see comment on isLineDelimiter(char) for the use of '\n' and '\r' final int INCREMENT = 250; @@ -1697,7 +2377,8 @@ public class Scanner implements IScanner, ITerminalSymbols { } } else { // lf 000A - if (currentCharacter == '\n') { //must merge eventual cr followed by lf + if (currentCharacter == '\n') { + //must merge eventual cr followed by lf if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) { //System.out.println("merge LF-" + (currentPosition - 1)); lineEnds[linePtr] = currentPosition - 1; @@ -1761,7 +2442,8 @@ public class Scanner implements IScanner, ITerminalSymbols { } } else { // lf 000A - if (currentCharacter == '\n') { //must merge eventual cr followed by lf + if (currentCharacter == '\n') { + //must merge eventual cr followed by lf if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) { //System.out.println("merge LF-" + (currentPosition - 1)); lineEnds[linePtr] = currentPosition - 6; @@ -1789,7 +2471,8 @@ public class Scanner implements IScanner, ITerminalSymbols { // a new annotation comment is recorded try { - commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition; + commentStops[++commentPtr] = + isJavadoc ? currentPosition : -currentPosition; } catch (IndexOutOfBoundsException e) { int oldStackLength = commentStops.length; int[] oldStack = commentStops; @@ -1814,35 +2497,68 @@ public class Scanner implements IScanner, ITerminalSymbols { commentPtr = -1; // reset comment stack } - public final void scanEscapeCharacter() throws InvalidInputException { + public final void scanSingleQuotedEscapeCharacter() + throws InvalidInputException { // the string with "\\u" is a legal string of two chars \ and u //thus we use a direct access to the source (for regular cases). - if (unicodeAsBackSlash) { - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - } else +// if (unicodeAsBackSlash) { +// // consume next character +// unicodeAsBackSlash = false; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } +// } else currentCharacter = source[currentPosition++]; switch (currentCharacter) { - case 'b' : - currentCharacter = '\b'; + case '\'' : + currentCharacter = '\''; break; + case '\\' : + currentCharacter = '\\'; + break; + default : + currentCharacter = '\\'; + currentPosition--; + } + } + + public final void scanDoubleQuotedEscapeCharacter() + throws InvalidInputException { + // the string with "\\u" is a legal string of two chars \ and u + //thus we use a direct access to the source (for regular cases). + +// if (unicodeAsBackSlash) { +// // consume next character +// unicodeAsBackSlash = false; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } +// } else + currentCharacter = source[currentPosition++]; + switch (currentCharacter) { + // case 'b' : + // currentCharacter = '\b'; + // break; case 't' : currentCharacter = '\t'; break; case 'n' : currentCharacter = '\n'; break; - case 'f' : - currentCharacter = '\f'; - break; + // case 'f' : + // currentCharacter = '\f'; + // break; case 'r' : currentCharacter = '\r'; break; @@ -1855,6 +2571,9 @@ public class Scanner implements IScanner, ITerminalSymbols { case '\\' : currentCharacter = '\\'; break; + case '$' : + currentCharacter = '$'; + break; default : // -----------octal escape-------------- // OctalDigit @@ -1864,16 +2583,19 @@ public class Scanner implements IScanner, ITerminalSymbols { int number = Character.getNumericValue(currentCharacter); if (number >= 0 && number <= 7) { boolean zeroToThreeNot = number > 3; - if (Character.isDigit(currentCharacter = source[currentPosition++])) { + if (Character + .isDigit(currentCharacter = source[currentPosition++])) { int digit = Character.getNumericValue(currentCharacter); if (digit >= 0 && digit <= 7) { number = (number * 8) + digit; - if (Character.isDigit(currentCharacter = source[currentPosition++])) { + if (Character + .isDigit(currentCharacter = source[currentPosition++])) { if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character currentPosition--; } else { digit = Character.getNumericValue(currentCharacter); - if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit + if (digit >= 0 && digit <= 7) { + // has read \ZeroToThree OctalDigit OctalDigit number = (number * 8) + digit; } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character currentPosition--; @@ -1891,41 +2613,62 @@ public class Scanner implements IScanner, ITerminalSymbols { if (number > 255) throw new InvalidInputException(INVALID_ESCAPE); currentCharacter = (char) number; - } else - throw new InvalidInputException(INVALID_ESCAPE); + } + //else + // throw new InvalidInputException(INVALID_ESCAPE); } } - public int scanIdentifierOrKeyword() throws InvalidInputException { + + // public int scanIdentifierOrKeyword() throws InvalidInputException { + // return scanIdentifierOrKeyword( false ); + // } + + public int scanIdentifierOrKeyword(boolean isVariable) + throws InvalidInputException { //test keywords //first dispatch on the first char. //then the length. If there are several //keywors with the same length AND the same first char, then do another //disptach on the second char :-)...cool....but fast ! + useAssertAsAnIndentifier = false; + while (getNextCharAsJavaIdentifierPart()) { }; + if (isVariable) { + return TokenNameVariable; + } int index, length; char[] data; char firstLetter; - if (withoutUnicodePtr == 0) +// if (withoutUnicodePtr == 0) //quick test on length == 1 but not on length > 12 while most identifier //have a length which is <= 12...but there are lots of identifier with //only one char.... - { +// { if ((length = currentPosition - startPosition) == 1) return TokenNameIdentifier; - data = source; + // data = source; + data = new char[length]; index = startPosition; - } else { - if ((length = withoutUnicodePtr) == 1) - return TokenNameIdentifier; - data = withoutUnicodeBuffer; - index = 1; - } + for (int i = 0; i < length; i++) { + data[i] = Character.toLowerCase(source[index + i]); + } + index = 0; +// } else { +// if ((length = withoutUnicodePtr) == 1) +// return TokenNameIdentifier; +// // data = withoutUnicodeBuffer; +// data = new char[withoutUnicodeBuffer.length]; +// for (int i = 0; i < withoutUnicodeBuffer.length; i++) { +// data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]); +// } +// index = 1; +// } firstLetter = data[index]; switch (firstLetter) { @@ -1940,7 +2683,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } case 3 : //and if ((data[++index] == 'n') && (data[++index] == 'd')) { - return TokenNameas; + return TokenNameAND; } else { return TokenNameIdentifier; } @@ -1955,7 +2698,10 @@ public class Scanner implements IScanner, ITerminalSymbols { case 'b' : //break switch (length) { case 5 : - if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k')) + if ((data[++index] == 'r') + && (data[++index] == 'e') + && (data[++index] == 'a') + && (data[++index] == 'k')) return TokenNamebreak; else return TokenNameIdentifier; @@ -1966,12 +2712,17 @@ public class Scanner implements IScanner, ITerminalSymbols { case 'c' : //case class continue switch (length) { case 4 : - if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e')) + if ((data[++index] == 'a') + && (data[++index] == 's') + && (data[++index] == 'e')) return TokenNamecase; else return TokenNameIdentifier; case 5 : - if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's')) + if ((data[++index] == 'l') + && (data[++index] == 'a') + && (data[++index] == 's') + && (data[++index] == 's')) return TokenNameclass; else return TokenNameIdentifier; @@ -2022,14 +2773,22 @@ public class Scanner implements IScanner, ITerminalSymbols { case 'e' : //echo else elseif extends switch (length) { case 4 : - if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o')) + if ((data[++index] == 'c') + && (data[++index] == 'h') + && (data[++index] == 'o')) return TokenNameecho; - else if ((data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e')) + else if ( + (data[index] == 'l') + && (data[++index] == 's') + && (data[++index] == 'e')) return TokenNameelse; else return TokenNameIdentifier; case 5 : // endif - if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f')) + if ((data[++index] == 'n') + && (data[++index] == 'd') + && (data[++index] == 'i') + && (data[++index] == 'f')) return TokenNameendif; else return TokenNameIdentifier; @@ -2041,7 +2800,7 @@ public class Scanner implements IScanner, ITerminalSymbols { && (data[++index] == 'r')) return TokenNameendfor; else if ( - (data[++index] == 'l') + (data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i') @@ -2108,12 +2867,25 @@ public class Scanner implements IScanner, ITerminalSymbols { else return TokenNameIdentifier; case 5 : - if ((data[index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e')) + if ((data[++index] == 'a') + && (data[++index] == 'l') + && (data[++index] == 's') + && (data[++index] == 'e')) return TokenNamefalse; else return TokenNameIdentifier; + case 7 : // function + if ((data[++index] == 'o') + && (data[++index] == 'r') + && (data[++index] == 'e') + && (data[++index] == 'a') + && (data[++index] == 'c') + && (data[++index] == 'h')) + return TokenNameforeach; + else + return TokenNameIdentifier; case 8 : // function - if ((data[index] == 'u') + if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't') @@ -2181,7 +2953,9 @@ public class Scanner implements IScanner, ITerminalSymbols { case 'l' : //list if (length == 4) { - if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) { + if ((data[++index] == 'i') + && (data[++index] == 's') + && (data[++index] == 't')) { return TokenNamelist; } } @@ -2195,7 +2969,9 @@ public class Scanner implements IScanner, ITerminalSymbols { else return TokenNameIdentifier; case 4 : - if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l')) + if ((data[++index] == 'u') + && (data[++index] == 'l') + && (data[++index] == 'l')) return TokenNamenull; else return TokenNameIdentifier; @@ -2206,29 +2982,32 @@ public class Scanner implements IScanner, ITerminalSymbols { case 'o' : // or old_function if (length == 2) { if (data[++index] == 'r') { - return TokenNameor; + return TokenNameOR; } } -// if (length == 12) { -// if ((data[++index] == 'l') -// && (data[++index] == 'd') -// && (data[++index] == '_') -// && (data[++index] == 'f') -// && (data[++index] == 'u') -// && (data[++index] == 'n') -// && (data[++index] == 'c') -// && (data[++index] == 't') -// && (data[++index] == 'i') -// && (data[++index] == 'o') -// && (data[++index] == 'n')) { -// return TokenNameold_function; -// } -// } + // if (length == 12) { + // if ((data[++index] == 'l') + // && (data[++index] == 'd') + // && (data[++index] == '_') + // && (data[++index] == 'f') + // && (data[++index] == 'u') + // && (data[++index] == 'n') + // && (data[++index] == 'c') + // && (data[++index] == 't') + // && (data[++index] == 'i') + // && (data[++index] == 'o') + // && (data[++index] == 'n')) { + // return TokenNameold_function; + // } + // } return TokenNameIdentifier; - + case 'p' : // print if (length == 5) { - if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) { + if ((data[++index] == 'r') + && (data[++index] == 'i') + && (data[++index] == 'n') + && (data[++index] == 't')) { return TokenNameprint; } } @@ -2272,7 +3051,10 @@ public class Scanner implements IScanner, ITerminalSymbols { switch (length) { case 6 : if (data[++index] == 't') - if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) { + if ((data[++index] == 'a') + && (data[++index] == 't') + && (data[++index] == 'i') + && (data[++index] == 'c')) { return TokenNamestatic; } else return TokenNameIdentifier; @@ -2293,7 +3075,9 @@ public class Scanner implements IScanner, ITerminalSymbols { switch (length) { case 4 : - if ((data[index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e')) + if ((data[++index] == 'r') + && (data[++index] == 'u') + && (data[++index] == 'e')) return TokenNametrue; else return TokenNameIdentifier; @@ -2304,7 +3088,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 'v' : //void volatile + case 'v' : //var switch (length) { case 3 : if ((data[++index] == 'a') && (data[++index] == 'r')) @@ -2316,10 +3100,13 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameIdentifier; } - case 'w' : //while widefp + case 'w' : //while switch (length) { case 5 : - if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e')) + if ((data[++index] == 'h') + && (data[++index] == 'i') + && (data[++index] == 'l') + && (data[++index] == 'e')) return TokenNamewhile; else return TokenNameIdentifier; @@ -2330,12 +3117,12 @@ public class Scanner implements IScanner, ITerminalSymbols { default : return TokenNameIdentifier; } - + case 'x' : //xor switch (length) { case 3 : if ((data[++index] == 'o') && (data[++index] == 'r')) - return TokenNamexor; + return TokenNameXOR; else return TokenNameIdentifier; @@ -2358,37 +3145,40 @@ public class Scanner implements IScanner, ITerminalSymbols { //force the first char of the hexa number do exist... // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } if (Character.digit(currentCharacter, 16) == -1) throw new InvalidInputException(INVALID_HEXA); //---end forcing-- while (getNextCharAsDigit(16)) { }; - if (getNextChar('l', 'L') >= 0) - return TokenNameLongLiteral; - else - return TokenNameIntegerLiteral; + // if (getNextChar('l', 'L') >= 0) + // return TokenNameLongLiteral; + // else + return TokenNameIntegerLiteral; } //there is x or X in the number //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language - if (getNextCharAsDigit()) { //-------------potential octal----------------- + if (getNextCharAsDigit()) { + //-------------potential octal----------------- while (getNextCharAsDigit()) { }; - if (getNextChar('l', 'L') >= 0) { - return TokenNameLongLiteral; - } - - if (getNextChar('f', 'F') >= 0) { - return TokenNameFloatingPointLiteral; - } + // if (getNextChar('l', 'L') >= 0) { + // return TokenNameLongLiteral; + // } + // + // if (getNextChar('f', 'F') >= 0) { + // return TokenNameFloatingPointLiteral; + // } if (getNextChar('d', 'D') >= 0) { return TokenNameDoubleLiteral; @@ -2396,33 +3186,40 @@ public class Scanner implements IScanner, ITerminalSymbols { if (getNextChar('.')) { //bingo ! .... while (getNextCharAsDigit()) { }; - if (getNextChar('e', 'E') >= 0) { // consume next character + if (getNextChar('e', 'E') >= 0) { + // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - - if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } + + if ((currentCharacter == '-') || (currentCharacter == '+')) { + // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } } if (!Character.isDigit(currentCharacter)) throw new InvalidInputException(INVALID_FLOAT); while (getNextCharAsDigit()) { }; } - if (getNextChar('f', 'F') >= 0) - return TokenNameFloatingPointLiteral; + // if (getNextChar('f', 'F') >= 0) + // return TokenNameFloatingPointLiteral; getNextChar('d', 'D'); //jump over potential d or D return TokenNameDoubleLiteral; } else { @@ -2437,8 +3234,8 @@ public class Scanner implements IScanner, ITerminalSymbols { while (getNextCharAsDigit()) { }; - if ((!dotPrefix) && (getNextChar('l', 'L') >= 0)) - return TokenNameLongLiteral; + // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0)) + // return TokenNameLongLiteral; if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty while (getNextCharAsDigit()) { @@ -2452,23 +3249,28 @@ public class Scanner implements IScanner, ITerminalSymbols { floating = true; // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } - if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character + if ((currentCharacter == '-') + || (currentCharacter == '+')) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } } if (!Character.isDigit(currentCharacter)) throw new InvalidInputException(INVALID_FLOAT); @@ -2478,8 +3280,8 @@ public class Scanner implements IScanner, ITerminalSymbols { if (getNextChar('d', 'D') >= 0) return TokenNameDoubleLiteral; - if (getNextChar('f', 'F') >= 0) - return TokenNameFloatingPointLiteral; + // if (getNextChar('f', 'F') >= 0) + // return TokenNameFloatingPointLiteral; //the long flag has been tested before @@ -2513,6 +3315,11 @@ public class Scanner implements IScanner, ITerminalSymbols { } return m + 2; } + + public void setPHPMode(boolean mode) { + phpMode = mode; + } + public final void setSource(char[] source) { //the source-buffer is set to sourceString @@ -2547,7 +3354,12 @@ public class Scanner implements IScanner, ITerminalSymbols { } char end[] = new char[source.length - (currentPosition - 1)]; - System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1); + System.arraycopy( + source, + (currentPosition - 1) + 1, + end, + 0, + source.length - (currentPosition - 1) - 1); return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$ + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$ @@ -2555,117 +3367,100 @@ public class Scanner implements IScanner, ITerminalSymbols { } public final String toStringAction(int act) { switch (act) { + case TokenNameERROR : + return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ + case TokenNameStopPHP : + return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameIdentifier : return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ - // case TokenNameabstract : - // return "abstract"; //$NON-NLS-1$ - // case TokenNameboolean : - // return "boolean"; //$NON-NLS-1$ + case TokenNameVariable : + return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + case TokenNameas : + return "as"; //$NON-NLS-1$ case TokenNamebreak : return "break"; //$NON-NLS-1$ - // case TokenNamebyte : - // return "byte"; //$NON-NLS-1$ case TokenNamecase : return "case"; //$NON-NLS-1$ - // case TokenNamecatch : - // return "catch"; //$NON-NLS-1$ - // case TokenNamechar : - // return "char"; //$NON-NLS-1$ case TokenNameclass : return "class"; //$NON-NLS-1$ case TokenNamecontinue : return "continue"; //$NON-NLS-1$ case TokenNamedefault : return "default"; //$NON-NLS-1$ + case TokenNamedefine : + return "define"; //$NON-NLS-1$ case TokenNamedo : return "do"; //$NON-NLS-1$ - // case TokenNamedouble : - // return "double"; //$NON-NLS-1$ + case TokenNameecho : + return "echo"; //$NON-NLS-1$ case TokenNameelse : return "else"; //$NON-NLS-1$ + case TokenNameelseif : + return "elseif"; //$NON-NLS-1$ + case TokenNameendfor : + return "endfor"; //$NON-NLS-1$ + case TokenNameendforeach : + return "endforeach"; //$NON-NLS-1$ + case TokenNameendif : + return "endif"; //$NON-NLS-1$ + case TokenNameendswitch : + return "endswitch"; //$NON-NLS-1$ + case TokenNameendwhile : + return "endwhile"; //$NON-NLS-1$ case TokenNameextends : return "extends"; //$NON-NLS-1$ case TokenNamefalse : return "false"; //$NON-NLS-1$ - // case TokenNamefinal : - // return "final"; //$NON-NLS-1$ - // case TokenNamefinally : - // return "finally"; //$NON-NLS-1$ - // case TokenNamefloat : - // return "float"; //$NON-NLS-1$ case TokenNamefor : return "for"; //$NON-NLS-1$ + case TokenNameforeach : + return "foreach"; //$NON-NLS-1$ + case TokenNamefunction : + return "function"; //$NON-NLS-1$ + case TokenNameglobal : + return "global"; //$NON-NLS-1$ case TokenNameif : return "if"; //$NON-NLS-1$ - // case TokenNameimplements : - // return "implements"; //$NON-NLS-1$ - // case TokenNameimport : - // return "import"; //$NON-NLS-1$ - // case TokenNameinstanceof : - // return "instanceof"; //$NON-NLS-1$ - // case TokenNameint : - // return "int"; //$NON-NLS-1$ - // case TokenNameinterface : - // return "interface"; //$NON-NLS-1$ - // case TokenNamelong : - // return "long"; //$NON-NLS-1$ - // case TokenNamenative : - // return "native"; //$NON-NLS-1$ + case TokenNameinclude : + return "include"; //$NON-NLS-1$ + case TokenNameinclude_once : + return "include_once"; //$NON-NLS-1$ + case TokenNamelist : + return "list"; //$NON-NLS-1$ case TokenNamenew : return "new"; //$NON-NLS-1$ case TokenNamenull : return "null"; //$NON-NLS-1$ - // case TokenNamepackage : - // return "package"; //$NON-NLS-1$ - // case TokenNameprivate : - // return "private"; //$NON-NLS-1$ - // case TokenNameprotected : - // return "protected"; //$NON-NLS-1$ - // case TokenNamepublic : - // return "public"; //$NON-NLS-1$ + case TokenNameprint : + return "print"; //$NON-NLS-1$ + case TokenNamerequire : + return "require"; //$NON-NLS-1$ + case TokenNamerequire_once : + return "require_once"; //$NON-NLS-1$ case TokenNamereturn : return "return"; //$NON-NLS-1$ - // case TokenNameshort : - // return "short"; //$NON-NLS-1$ case TokenNamestatic : return "static"; //$NON-NLS-1$ - // case TokenNamesuper : - // return "super"; //$NON-NLS-1$ case TokenNameswitch : return "switch"; //$NON-NLS-1$ - // case TokenNamesynchronized : - // return "synchronized"; //$NON-NLS-1$ - // case TokenNamethis : - // return "this"; //$NON-NLS-1$ - // case TokenNamethrow : - // return "throw"; //$NON-NLS-1$ - // case TokenNamethrows : - // return "throws"; //$NON-NLS-1$ - // case TokenNametransient : - // return "transient"; //$NON-NLS-1$ case TokenNametrue : return "true"; //$NON-NLS-1$ - // case TokenNametry : - // return "try"; //$NON-NLS-1$ - // case TokenNamevoid : - // return "void"; //$NON-NLS-1$ - // case TokenNamevolatile : - // return "volatile"; //$NON-NLS-1$ + case TokenNamevar : + return "var"; //$NON-NLS-1$ case TokenNamewhile : return "while"; //$NON-NLS-1$ - case TokenNameIntegerLiteral : return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ - case TokenNameLongLiteral : - return "Long(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ - case TokenNameFloatingPointLiteral : - return "Float(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameDoubleLiteral : return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ - case TokenNameCharacterLiteral : - return "Char(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ case TokenNameStringLiteral : return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + case TokenNameStringConstant : + return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + case TokenNameStringInterpolated : + return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$ + case TokenNameHEREDOC : + return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ case TokenNamePLUS_PLUS : return "++"; //$NON-NLS-1$ @@ -2673,6 +3468,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "--"; //$NON-NLS-1$ case TokenNameEQUAL_EQUAL : return "=="; //$NON-NLS-1$ + case TokenNameEQUAL_GREATER : + return "=>"; //$NON-NLS-1$ case TokenNameLESS_EQUAL : return "<="; //$NON-NLS-1$ case TokenNameGREATER_EQUAL : @@ -2683,8 +3480,6 @@ public class Scanner implements IScanner, ITerminalSymbols { return "<<"; //$NON-NLS-1$ case TokenNameRIGHT_SHIFT : return ">>"; //$NON-NLS-1$ - case TokenNameUNSIGNED_RIGHT_SHIFT : - return ">>>"; //$NON-NLS-1$ case TokenNamePLUS_EQUAL : return "+="; //$NON-NLS-1$ case TokenNameMINUS_EQUAL : @@ -2705,8 +3500,6 @@ public class Scanner implements IScanner, ITerminalSymbols { return "<<="; //$NON-NLS-1$ case TokenNameRIGHT_SHIFT_EQUAL : return ">>="; //$NON-NLS-1$ - case TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL : - return ">>>="; //$NON-NLS-1$ case TokenNameOR_OR : return "||"; //$NON-NLS-1$ case TokenNameAND_AND : @@ -2715,6 +3508,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "+"; //$NON-NLS-1$ case TokenNameMINUS : return "-"; //$NON-NLS-1$ + case TokenNameMINUS_GREATER : + return "->"; case TokenNameNOT : return "!"; //$NON-NLS-1$ case TokenNameREMAINDER : @@ -2729,6 +3524,8 @@ public class Scanner implements IScanner, ITerminalSymbols { return "|"; //$NON-NLS-1$ case TokenNameTWIDDLE : return "~"; //$NON-NLS-1$ + case TokenNameTWIDDLE_EQUAL : + return "~="; //$NON-NLS-1$ case TokenNameDIVIDE : return "/"; //$NON-NLS-1$ case TokenNameGREATER : @@ -2759,15 +3556,26 @@ public class Scanner implements IScanner, ITerminalSymbols { return "."; //$NON-NLS-1$ case TokenNameEQUAL : return "="; //$NON-NLS-1$ + case TokenNameAT : + return "@"; + case TokenNameDOLLAR_LBRACE : + return "${"; case TokenNameEOF : return "EOF"; //$NON-NLS-1$ default : - return "not-a-token"; //$NON-NLS-1$ + return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$ } } - public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) { - this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false); + public Scanner( + boolean tokenizeComments, + boolean tokenizeWhiteSpace, + boolean checkNonExternalizedStringLiterals) { + this( + tokenizeComments, + tokenizeWhiteSpace, + checkNonExternalizedStringLiterals, + false); } public Scanner( @@ -2778,7 +3586,8 @@ public class Scanner implements IScanner, ITerminalSymbols { this.eofPosition = Integer.MAX_VALUE; this.tokenizeComments = tokenizeComments; this.tokenizeWhiteSpace = tokenizeWhiteSpace; - this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals; + this.checkNonExternalizedStringLiterals = + checkNonExternalizedStringLiterals; this.assertMode = assertMode; } @@ -2798,7 +3607,8 @@ public class Scanner implements IScanner, ITerminalSymbols { String index = s.substring(start, end); int i = 0; try { - i = Integer.parseInt(index) - 1; // Tags are one based not zero based. + i = Integer.parseInt(index) - 1; + // Tags are one based not zero based. } catch (NumberFormatException e) { i = -1; // we don't want to consider this as a valid NLS tag } @@ -2823,7 +3633,12 @@ public class Scanner implements IScanner, ITerminalSymbols { } this.wasNonExternalizedStringLiteral = true; if (nonNLSCounter != lineLength) { - System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter); + System.arraycopy( + this.nonNLSStrings, + 0, + (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), + 0, + nonNLSCounter); } currentLine = null; }