X-Git-Url: http://git.phpeclipse.com diff --git a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java index 36adb95..5427ff6 100644 --- a/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java +++ b/net.sourceforge.phpeclipse/src/net/sourceforge/phpdt/internal/compiler/parser/Scanner.java @@ -156,12 +156,35 @@ public class Scanner implements IScanner, ITerminalSymbols { public static final int SquareBracket = 1; public static final int CurlyBracket = 2; public static final int BracketKinds = 3; + + public static final boolean DEBUG = false; public Scanner() { this(false, false); } public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) { this(tokenizeComments, tokenizeWhiteSpace, false); } + + /** + * Determines if the specified character is + * permissible as the first character in a PHP identifier + */ + public static boolean isPHPIdentifierStart(char ch) { + return Character.isLetter(ch) + || (ch == '_') + || (0x7F <= ch && ch <= 0xFF); + } + + /** + * Determines if the specified character may be part of a PHP identifier as + * other than the first character + */ + public static boolean isPHPIdentifierPart(char ch) { + return Character.isLetterOrDigit(ch) + || (ch == '_') + || (0x7F <= ch && ch <= 0xFF); + } + public final boolean atEnd() { // This code is not relevant if source is // Only a part of the real stream input @@ -172,15 +195,15 @@ public class Scanner implements IScanner, ITerminalSymbols { //return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - //0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy( - withoutUnicodeBuffer, - 1, - result = new char[withoutUnicodePtr], - 0, - withoutUnicodePtr); - else { +// if (withoutUnicodePtr != 0) +// //0 is used as a fast test flag so the real first char is in position 1 +// System.arraycopy( +// withoutUnicodeBuffer, +// 1, +// result = new char[withoutUnicodePtr], +// 0, +// withoutUnicodePtr); +// else { int length = currentPosition - startPosition; switch (length) { // see OptimizedLength case 1 : @@ -203,7 +226,7 @@ public class Scanner implements IScanner, ITerminalSymbols { result = new char[length], 0, length); - } + // } return result; } public int getCurrentTokenEndPosition() { @@ -213,15 +236,15 @@ public class Scanner implements IScanner, ITerminalSymbols { // Return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - // 0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy( - withoutUnicodeBuffer, - 1, - result = new char[withoutUnicodePtr], - 0, - withoutUnicodePtr); - else { +// if (withoutUnicodePtr != 0) +// // 0 is used as a fast test flag so the real first char is in position 1 +// System.arraycopy( +// withoutUnicodeBuffer, +// 1, +// result = new char[withoutUnicodePtr], +// 0, +// withoutUnicodePtr); +// else { int length; System.arraycopy( source, @@ -229,23 +252,23 @@ public class Scanner implements IScanner, ITerminalSymbols { result = new char[length = currentPosition - startPosition], 0, length); - } +// } return result; } - + public final char[] getCurrentTokenSource(int startPos) { // Return the token REAL source (aka unicodes are precomputed) char[] result; - if (withoutUnicodePtr != 0) - // 0 is used as a fast test flag so the real first char is in position 1 - System.arraycopy( - withoutUnicodeBuffer, - 1, - result = new char[withoutUnicodePtr], - 0, - withoutUnicodePtr); - else { +// if (withoutUnicodePtr != 0) +// // 0 is used as a fast test flag so the real first char is in position 1 +// System.arraycopy( +// withoutUnicodeBuffer, +// 1, +// result = new char[withoutUnicodePtr], +// 0, +// withoutUnicodePtr); +// else { int length; System.arraycopy( source, @@ -253,10 +276,10 @@ public class Scanner implements IScanner, ITerminalSymbols { result = new char[length = currentPosition - startPos], 0, length); - } + // } return result; } - + public final char[] getCurrentTokenSourceString() { //return the token REAL source (aka unicodes are precomputed). //REMOVE the two " that are at the beginning and the end. @@ -340,62 +363,63 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (currentCharacter != testedChar) { - currentPosition = temp; - return false; - } - unicodeAsBackSlash = currentCharacter == '\\'; - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (currentCharacter != testedChar) { +// currentPosition = temp; +// return false; +// } +// unicodeAsBackSlash = currentCharacter == '\\'; +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// +// } //-------------end unicode traitement-------------- +// else { if (currentCharacter != testedChar) { currentPosition = temp; return false; } unicodeAsBackSlash = false; - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { unicodeAsBackSlash = false; currentPosition = temp; @@ -417,55 +441,56 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { int result; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return 2; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (currentCharacter == testedChar1) - result = 0; - else if (currentCharacter == testedChar2) - result = 1; - else { - currentPosition = temp; - return -1; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return result; - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return 2; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (currentCharacter == testedChar1) +// result = 0; +// else if (currentCharacter == testedChar2) +// result = 1; +// else { +// currentPosition = temp; +// return -1; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return result; +// } //-------------end unicode traitement-------------- +// else { if (currentCharacter == testedChar1) result = 0; else if (currentCharacter == testedChar2) @@ -475,10 +500,10 @@ public class Scanner implements IScanner, ITerminalSymbols { return -1; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return result; - } + // } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return -1; @@ -497,59 +522,60 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!Character.isDigit(currentCharacter)) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (!Character.isDigit(currentCharacter)) { +// currentPosition = temp; +// return false; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// } //-------------end unicode traitement-------------- +// else { if (!Character.isDigit(currentCharacter)) { currentPosition = temp; return false; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -568,59 +594,60 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (Character.digit(currentCharacter, radix) == -1) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (Character.digit(currentCharacter, radix) == -1) { +// currentPosition = temp; +// return false; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// } //-------------end unicode traitement-------------- +// else { if (Character.digit(currentCharacter, radix) == -1) { currentPosition = temp; return false; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -639,60 +666,61 @@ public class Scanner implements IScanner, ITerminalSymbols { int temp = currentPosition; try { - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - currentPosition = temp; - return false; - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (!Character.isJavaIdentifierPart(currentCharacter)) { - currentPosition = temp; - return false; - } - - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - return true; - } //-------------end unicode traitement-------------- - else { - if (!Character.isJavaIdentifierPart(currentCharacter)) { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// currentPosition = temp; +// return false; +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (!isPHPIdentifierPart(currentCharacter)) { +// currentPosition = temp; +// return false; +// } +// +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// return true; +// } //-------------end unicode traitement-------------- +// else { + if (!isPHPIdentifierPart(currentCharacter)) { currentPosition = temp; return false; } - if (withoutUnicodePtr != 0) - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// if (withoutUnicodePtr != 0) +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; return true; - } +// } } catch (IndexOutOfBoundsException e) { currentPosition = temp; return false; @@ -700,9 +728,9 @@ public class Scanner implements IScanner, ITerminalSymbols { } public int getNextToken() throws InvalidInputException { + int htmlPosition = currentPosition; try { while (!phpMode) { - startPosition = currentPosition; currentCharacter = source[currentPosition++]; if (currentCharacter == '<') { if (getNextChar('?')) { @@ -712,6 +740,12 @@ public class Scanner implements IScanner, ITerminalSymbols { // 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// } //jump over the \\ boolean isUnicode = false; while (currentCharacter != '\r' && currentCharacter != '\n') { @@ -1399,46 +1451,47 @@ public class Scanner implements IScanner, ITerminalSymbols { //get the next char isUnicode = false; - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - isUnicode = true; - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue( - source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue( - source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue( - source[currentPosition++])) - > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// isUnicode = true; +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue( +// source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue( +// source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue( +// source[currentPosition++])) +// > 15 +// || c4 < 0) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// } //jump over the \\ } if (isUnicode) { endPositionForLineComment = currentPosition - 6; @@ -1480,15 +1533,16 @@ public class Scanner implements IScanner, ITerminalSymbols { boolean isJavadoc = false, star = false; // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } if (currentCharacter == '*') { isJavadoc = true; @@ -1504,18 +1558,19 @@ public class Scanner implements IScanner, ITerminalSymbols { } } try { //get the next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - getNextUnicodeChar(); - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// getNextUnicodeChar(); +// } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - //jump over the \\ - } +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// //jump over the \\ +// } // empty comment is not a javadoc /**/ if (currentCharacter == '/') { isJavadoc = false; @@ -1533,22 +1588,23 @@ public class Scanner implements IScanner, ITerminalSymbols { } star = currentCharacter == '*'; //get next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - getNextUnicodeChar(); - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// getNextUnicodeChar(); +// } //handle the \\u case manually into comment - if (currentCharacter == '\\') { - if (source[currentPosition] == '\\') - currentPosition++; - } //jump over the \\ +// if (currentCharacter == '\\') { +// if (source[currentPosition] == '\\') +// currentPosition++; +// } //jump over the \\ } recordComment(isJavadoc); if (tokenizeComments) { if (isJavadoc) - return TokenNameCOMMENT_JAVADOC; + return TokenNameCOMMENT_PHPDOC; return TokenNameCOMMENT_BLOCK; } } catch (IndexOutOfBoundsException e) { @@ -1568,11 +1624,15 @@ public class Scanner implements IScanner, ITerminalSymbols { default : if (currentCharacter == '$') { - if (getNextChar('{')) + while ((currentCharacter = source[currentPosition++]) == '$') { + } + if (currentCharacter == '{') return TokenNameDOLLAR_LBRACE; - return scanIdentifierOrKeyword(true); + if (isPHPIdentifierStart(currentCharacter)) + return scanIdentifierOrKeyword(true); + return TokenNameERROR; } - if (Character.isJavaIdentifierStart(currentCharacter)) + if (isPHPIdentifierStart(currentCharacter)) return scanIdentifierOrKeyword(false); if (Character.isDigit(currentCharacter)) return scanNumber(false); @@ -1586,50 +1646,50 @@ public class Scanner implements IScanner, ITerminalSymbols { return TokenNameEOF; } - public final void getNextUnicodeChar() - throws IndexOutOfBoundsException, InvalidInputException { - //VOID - //handle the case of unicode. - //when a unicode appears then we must use a buffer that holds char internal values - //At the end of this method currentCharacter holds the new visited char - //and currentPosition points right next after it - - //ALL getNextChar.... ARE OPTIMIZED COPIES - - int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0 - || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0 - || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0 - || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } else { - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - //need the unicode buffer - if (withoutUnicodePtr == 0) { - //buffer all the entries that have been left aside.... - withoutUnicodePtr = currentPosition - unicodeSize - startPosition; - System.arraycopy( - source, - startPosition, - withoutUnicodeBuffer, - 1, - withoutUnicodePtr); - } - //fill the buffer with the char - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - unicodeAsBackSlash = currentCharacter == '\\'; - } +// public final void getNextUnicodeChar() +// throws IndexOutOfBoundsException, InvalidInputException { +// //VOID +// //handle the case of unicode. +// //when a unicode appears then we must use a buffer that holds char internal values +// //At the end of this method currentCharacter holds the new visited char +// //and currentPosition points right next after it +// +// //ALL getNextChar.... ARE OPTIMIZED COPIES +// +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0 +// || (c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0 +// || (c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0 +// || (c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } else { +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// //need the unicode buffer +// if (withoutUnicodePtr == 0) { +// //buffer all the entries that have been left aside.... +// withoutUnicodePtr = currentPosition - unicodeSize - startPosition; +// System.arraycopy( +// source, +// startPosition, +// withoutUnicodeBuffer, +// 1, +// withoutUnicodePtr); +// } +// //fill the buffer with the char +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// unicodeAsBackSlash = currentCharacter == '\\'; +// } /* Tokenize a method body, assuming that curly brackets are properly balanced. */ public final void jumpOverMethodBody() { @@ -1642,15 +1702,16 @@ public class Scanner implements IScanner, ITerminalSymbols { boolean isWhiteSpace; do { startPosition = currentPosition; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - isWhiteSpace = jumpOverUnicodeWhiteSpace(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// isWhiteSpace = jumpOverUnicodeWhiteSpace(); +// } else { if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); isWhiteSpace = Character.isWhitespace(currentCharacter); - } +// } } while (isWhiteSpace); // -------consume token until } is found--------- @@ -1669,42 +1730,44 @@ public class Scanner implements IScanner, ITerminalSymbols { test = getNextChar('\\'); if (test) { try { - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); } catch (InvalidInputException ex) { }; } else { - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } - } - } catch (InvalidInputException ex) { - }; +// } +// } catch (InvalidInputException ex) { +// }; } getNextChar('\''); break; } case '"' : try { - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } - } - } catch (InvalidInputException ex) { - }; +// } +// } catch (InvalidInputException ex) { +// }; while (currentCharacter != '"') { if (currentCharacter == '\r') { if (source[currentPosition] == '\n') @@ -1718,23 +1781,24 @@ public class Scanner implements IScanner, ITerminalSymbols { } if (currentCharacter == '\\') { try { - scanEscapeCharacter(); + scanDoubleQuotedEscapeCharacter(); } catch (InvalidInputException ex) { }; } - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } - } - } catch (InvalidInputException ex) { - }; +// } +// } catch (InvalidInputException ex) { +// }; } } catch (IndexOutOfBoundsException e) { return; @@ -1747,75 +1811,77 @@ public class Scanner implements IScanner, ITerminalSymbols { //line comment try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from \n and \r - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from \n and \r +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } while (currentCharacter != '\r' && currentCharacter != '\n') { //get the next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from \n and \r - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from \n and \r +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } } if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n'))) @@ -1827,19 +1893,20 @@ public class Scanner implements IScanner, ITerminalSymbols { if (test > 0) { //traditional and annotation comment boolean star = false; - try { // consume next character +// try { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { if (withoutUnicodePtr != 0) { withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; } - }; - } catch (InvalidInputException ex) { - }; +// }; +// } catch (InvalidInputException ex) { +// }; if (currentCharacter == '*') { star = true; } @@ -1847,38 +1914,39 @@ public class Scanner implements IScanner, ITerminalSymbols { && ((currentCharacter == '\r') || (currentCharacter == '\n'))) pushLineSeparator(); try { //get the next char - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from * and / - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from * and / +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } //loop until end of comment */ while ((currentCharacter != '/') || (!star)) { if (recordLineSeparator @@ -1887,39 +1955,40 @@ public class Scanner implements IScanner, ITerminalSymbols { pushLineSeparator(); star = currentCharacter == '*'; //get next char - if (((currentCharacter = source[currentPosition++]) - == '\\') - && (source[currentPosition] == 'u')) { - //-------------unicode traitement ------------ - int c1 = 0, c2 = 0, c3 = 0, c4 = 0; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - } - if ((c1 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c1 < 0 - || (c2 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c2 < 0 - || (c3 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c3 < 0 - || (c4 = - Character.getNumericValue(source[currentPosition++])) - > 15 - || c4 < 0) { - //error don't care of the value - currentCharacter = 'A'; - } //something different from * and / - else { - currentCharacter = - (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) +// == '\\') +// && (source[currentPosition] == 'u')) { +// //-------------unicode traitement ------------ +// int c1 = 0, c2 = 0, c3 = 0, c4 = 0; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// } +// if ((c1 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c1 < 0 +// || (c2 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c2 < 0 +// || (c3 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c3 < 0 +// || (c4 = +// Character.getNumericValue(source[currentPosition++])) +// > 15 +// || c4 < 0) { +// //error don't care of the value +// currentCharacter = 'A'; +// } //something different from * and / +// else { +// currentCharacter = +// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// } +// } } } catch (IndexOutOfBoundsException e) { return; @@ -1930,7 +1999,7 @@ public class Scanner implements IScanner, ITerminalSymbols { } default : - if (Character.isJavaIdentifierStart(currentCharacter) + if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') { try { scanIdentifierOrKeyword((currentCharacter == '$')); @@ -1953,50 +2022,50 @@ public class Scanner implements IScanner, ITerminalSymbols { } return; } - public final boolean jumpOverUnicodeWhiteSpace() - throws InvalidInputException { - //BOOLEAN - //handle the case of unicode. Jump over the next whiteSpace - //making startPosition pointing on the next available char - //On false, the currentCharacter is filled up with a potential - //correct char - - try { - this.wasAcr = false; - int c1, c2, c3, c4; - int unicodeSize = 6; - currentPosition++; - while (source[currentPosition] == 'u') { - currentPosition++; - unicodeSize++; - } - - if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 - || c1 < 0) - || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 - || c2 < 0) - || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 - || c3 < 0) - || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 - || c4 < 0)) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } - - currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); - if (recordLineSeparator - && ((currentCharacter == '\r') || (currentCharacter == '\n'))) - pushLineSeparator(); - if (Character.isWhitespace(currentCharacter)) - return true; - - //buffer the new char which is not a white space - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - //withoutUnicodePtr == 1 is true here - return false; - } catch (IndexOutOfBoundsException e) { - throw new InvalidInputException(INVALID_UNICODE_ESCAPE); - } - } +// public final boolean jumpOverUnicodeWhiteSpace() +// throws InvalidInputException { +// //BOOLEAN +// //handle the case of unicode. Jump over the next whiteSpace +// //making startPosition pointing on the next available char +// //On false, the currentCharacter is filled up with a potential +// //correct char +// +// try { +// this.wasAcr = false; +// int c1, c2, c3, c4; +// int unicodeSize = 6; +// currentPosition++; +// while (source[currentPosition] == 'u') { +// currentPosition++; +// unicodeSize++; +// } +// +// if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c1 < 0) +// || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c2 < 0) +// || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c3 < 0) +// || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 +// || c4 < 0)) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } +// +// currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4); +// if (recordLineSeparator +// && ((currentCharacter == '\r') || (currentCharacter == '\n'))) +// pushLineSeparator(); +// if (Character.isWhitespace(currentCharacter)) +// return true; +// +// //buffer the new char which is not a white space +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// //withoutUnicodePtr == 1 is true here +// return false; +// } catch (IndexOutOfBoundsException e) { +// throw new InvalidInputException(INVALID_UNICODE_ESCAPE); +// } +// } public final int[] getLineEnds() { //return a bounded copy of this.lineEnds @@ -2070,6 +2139,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return new char[] { charOne }; } } + final char[] optimizedCurrentTokenSource2() { //try to return the same char[] build only once @@ -2100,6 +2170,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry2 = max; return r; } + final char[] optimizedCurrentTokenSource3() { //try to return the same char[] build only once @@ -2132,6 +2203,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry3 = max; return r; } + final char[] optimizedCurrentTokenSource4() { //try to return the same char[] build only once @@ -2172,6 +2244,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return r; } + final char[] optimizedCurrentTokenSource5() { //try to return the same char[] build only once @@ -2215,6 +2288,7 @@ public class Scanner implements IScanner, ITerminalSymbols { return r; } + final char[] optimizedCurrentTokenSource6() { //try to return the same char[] build only once @@ -2260,6 +2334,7 @@ public class Scanner implements IScanner, ITerminalSymbols { newEntry6 = max; return r; } + public final void pushLineSeparator() throws InvalidInputException { //see comment on isLineDelimiter(char) for the use of '\n' and '\r' final int INCREMENT = 250; @@ -2421,36 +2496,68 @@ public class Scanner implements IScanner, ITerminalSymbols { commentPtr = -1; // reset comment stack } - public final void scanEscapeCharacter() throws InvalidInputException { + public final void scanSingleQuotedEscapeCharacter() + throws InvalidInputException { // the string with "\\u" is a legal string of two chars \ and u //thus we use a direct access to the source (for regular cases). - if (unicodeAsBackSlash) { - // consume next character - unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } - } else +// if (unicodeAsBackSlash) { +// // consume next character +// unicodeAsBackSlash = false; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } +// } else currentCharacter = source[currentPosition++]; switch (currentCharacter) { - case 'b' : - currentCharacter = '\b'; + case '\'' : + currentCharacter = '\''; + break; + case '\\' : + currentCharacter = '\\'; break; + default : + currentCharacter = '\\'; + currentPosition--; + } + } + + public final void scanDoubleQuotedEscapeCharacter() + throws InvalidInputException { + // the string with "\\u" is a legal string of two chars \ and u + //thus we use a direct access to the source (for regular cases). + +// if (unicodeAsBackSlash) { +// // consume next character +// unicodeAsBackSlash = false; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } +// } else + currentCharacter = source[currentPosition++]; + switch (currentCharacter) { + // case 'b' : + // currentCharacter = '\b'; + // break; case 't' : currentCharacter = '\t'; break; case 'n' : currentCharacter = '\n'; break; - case 'f' : - currentCharacter = '\f'; - break; + // case 'f' : + // currentCharacter = '\f'; + // break; case 'r' : currentCharacter = '\r'; break; @@ -2463,6 +2570,9 @@ public class Scanner implements IScanner, ITerminalSymbols { case '\\' : currentCharacter = '\\'; break; + case '$' : + currentCharacter = '$'; + break; default : // -----------octal escape-------------- // OctalDigit @@ -2512,7 +2622,7 @@ public class Scanner implements IScanner, ITerminalSymbols { // return scanIdentifierOrKeyword( false ); // } - public int scanIdentifierOrKeyword(boolean isVariable) + public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException { //test keywords @@ -2522,6 +2632,7 @@ public class Scanner implements IScanner, ITerminalSymbols { //disptach on the second char :-)...cool....but fast ! useAssertAsAnIndentifier = false; + while (getNextCharAsJavaIdentifierPart()) { }; @@ -2531,13 +2642,13 @@ public class Scanner implements IScanner, ITerminalSymbols { int index, length; char[] data; char firstLetter; - if (withoutUnicodePtr == 0) +// if (withoutUnicodePtr == 0) //quick test on length == 1 but not on length > 12 while most identifier //have a length which is <= 12...but there are lots of identifier with //only one char.... - { +// { if ((length = currentPosition - startPosition) == 1) return TokenNameIdentifier; // data = source; @@ -2547,16 +2658,16 @@ public class Scanner implements IScanner, ITerminalSymbols { data[i] = Character.toLowerCase(source[index + i]); } index = 0; - } else { - if ((length = withoutUnicodePtr) == 1) - return TokenNameIdentifier; - // data = withoutUnicodeBuffer; - data = new char[withoutUnicodeBuffer.length]; - for (int i = 0; i < withoutUnicodeBuffer.length; i++) { - data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]); - } - index = 1; - } +// } else { +// if ((length = withoutUnicodePtr) == 1) +// return TokenNameIdentifier; +// // data = withoutUnicodeBuffer; +// data = new char[withoutUnicodeBuffer.length]; +// for (int i = 0; i < withoutUnicodeBuffer.length; i++) { +// data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]); +// } +// index = 1; +// } firstLetter = data[index]; switch (firstLetter) { @@ -3033,14 +3144,15 @@ public class Scanner implements IScanner, ITerminalSymbols { //force the first char of the hexa number do exist... // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } if (Character.digit(currentCharacter, 16) == -1) throw new InvalidInputException(INVALID_HEXA); //---end forcing-- @@ -3076,27 +3188,29 @@ public class Scanner implements IScanner, ITerminalSymbols { if (getNextChar('e', 'E') >= 0) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = - currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = +// currentCharacter; +// } +// } } if (!Character.isDigit(currentCharacter)) throw new InvalidInputException(INVALID_FLOAT); @@ -3134,26 +3248,28 @@ public class Scanner implements IScanner, ITerminalSymbols { floating = true; // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character unicodeAsBackSlash = false; - if (((currentCharacter = source[currentPosition++]) == '\\') - && (source[currentPosition] == 'u')) { - getNextUnicodeChar(); - } else { - if (withoutUnicodePtr != 0) { - withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; - } - } + currentCharacter = source[currentPosition++]; +// if (((currentCharacter = source[currentPosition++]) == '\\') +// && (source[currentPosition] == 'u')) { +// getNextUnicodeChar(); +// } else { +// if (withoutUnicodePtr != 0) { +// withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter; +// } +// } } if (!Character.isDigit(currentCharacter)) throw new InvalidInputException(INVALID_FLOAT);