// source
public boolean tokenizeComments;
public boolean tokenizeWhiteSpace;
+ public boolean tokenizeStrings;
//source should be viewed as a window (aka a part)
//of a entire very large stream
public char source[];
public int[][] foundTaskPositions;
public int foundTaskCount = 0;
public char[][] taskTags = null;
- public char[][] taskPriorities = null;
+ public char[][] taskPriorities = null;
public static final boolean DEBUG = false;
public static final boolean TRACE = false;
public Scanner() {
currentPosition = tempPosition;
return TokenNameLPAREN;
}
+ public void consumeStringInterpolated() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '`') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '`') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
+ public void consumeStringConstant() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '\'') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanSingleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '\'') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
+ public void consumeStringLiteral() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '"') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '\"') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
public int getNextToken() throws InvalidInputException {
if (!phpMode) {
return getInlinedHTML(currentPosition);
case ']' :
return TokenNameRBRACKET;
case '\'' :
+ if (tokenizeStrings) {
+ consumeStringConstant();
+ return TokenNameStringConstant;
+ }
return TokenNameEncapsedString1;
case '"' :
return TokenNameEncapsedString2;
case '`' :
+ if (tokenizeStrings) {
+ consumeStringInterpolated();
+ return TokenNameStringInterpolated;
+ }
return TokenNameEncapsedString0;
case '-' :
if (getNextChar('>'))
return TokenNameERROR;
}
}
-// boolean isWhiteSpace;
+ // boolean isWhiteSpace;
while ((currentCharacter == ' ')
- || Character.isWhitespace(currentCharacter)) {
+ || Character.isWhitespace(currentCharacter)) {
startPosition = currentPosition;
currentCharacter = source[currentPosition++];
// if (((currentCharacter = source[currentPosition++]) == '\\')
currentLine = null;
}
}
-// isWhiteSpace = (currentCharacter == ' ')
-// || Character.isWhitespace(currentCharacter);
+ // isWhiteSpace = (currentCharacter == ' ')
+ // || Character.isWhitespace(currentCharacter);
// }
- }
+ }
if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
// reposition scanner in case we are interested by spaces as tokens
currentPosition--;
//little trick to get out in the middle of a source compuation
if (currentPosition > eofPosition)
return TokenNameEOF;
-
- // ---------Identify the next token-------------
- switch (currentCharacter) {
- case '(' :
- return getCastOrParen();
- case ')' :
- return TokenNameRPAREN;
- case '{' :
- return TokenNameLBRACE;
- case '}' :
- return TokenNameRBRACE;
- case '[' :
- return TokenNameLBRACKET;
- case ']' :
- return TokenNameRBRACKET;
- case ';' :
- return TokenNameSEMICOLON;
- case ',' :
- return TokenNameCOMMA;
- case '.' :
- if (getNextChar('='))
- return TokenNameDOT_EQUAL;
- if (getNextCharAsDigit())
- return scanNumber(true);
- return TokenNameDOT;
- case '+' :
- {
- int test;
- if ((test = getNextChar('+', '=')) == 0)
- return TokenNamePLUS_PLUS;
- if (test > 0)
- return TokenNamePLUS_EQUAL;
- return TokenNamePLUS;
- }
- case '-' :
+ // ---------Identify the next token-------------
+ switch (currentCharacter) {
+ case '(' :
+ return getCastOrParen();
+ case ')' :
+ return TokenNameRPAREN;
+ case '{' :
+ return TokenNameLBRACE;
+ case '}' :
+ return TokenNameRBRACE;
+ case '[' :
+ return TokenNameLBRACKET;
+ case ']' :
+ return TokenNameRBRACKET;
+ case ';' :
+ return TokenNameSEMICOLON;
+ case ',' :
+ return TokenNameCOMMA;
+ case '.' :
+ if (getNextChar('='))
+ return TokenNameDOT_EQUAL;
+ if (getNextCharAsDigit())
+ return scanNumber(true);
+ return TokenNameDOT;
+ case '+' :
+ {
+ int test;
+ if ((test = getNextChar('+', '=')) == 0)
+ return TokenNamePLUS_PLUS;
+ if (test > 0)
+ return TokenNamePLUS_EQUAL;
+ return TokenNamePLUS;
+ }
+ case '-' :
{
- int test;
- if ((test = getNextChar('-', '=')) == 0)
- return TokenNameMINUS_MINUS;
- if (test > 0)
- return TokenNameMINUS_EQUAL;
- if (getNextChar('>'))
- return TokenNameMINUS_GREATER;
- return TokenNameMINUS;
- }
- case '~' :
- if (getNextChar('='))
- return TokenNameTWIDDLE_EQUAL;
- return TokenNameTWIDDLE;
- case '!' :
+ int test;
+ if ((test = getNextChar('-', '=')) == 0)
+ return TokenNameMINUS_MINUS;
+ if (test > 0)
+ return TokenNameMINUS_EQUAL;
+ if (getNextChar('>'))
+ return TokenNameMINUS_GREATER;
+ return TokenNameMINUS;
+ }
+ case '~' :
+ if (getNextChar('='))
+ return TokenNameTWIDDLE_EQUAL;
+ return TokenNameTWIDDLE;
+ case '!' :
+ if (getNextChar('=')) {
if (getNextChar('=')) {
- if (getNextChar('=')) {
- return TokenNameNOT_EQUAL_EQUAL;
- }
- return TokenNameNOT_EQUAL;
+ return TokenNameNOT_EQUAL_EQUAL;
}
- return TokenNameNOT;
- case '*' :
- if (getNextChar('='))
- return TokenNameMULTIPLY_EQUAL;
- return TokenNameMULTIPLY;
- case '%' :
- if (getNextChar('='))
- return TokenNameREMAINDER_EQUAL;
- return TokenNameREMAINDER;
- case '<' :
- {
- int oldPosition = currentPosition;
- try {
- currentCharacter = source[currentPosition++];
- } catch (IndexOutOfBoundsException e) {
- currentPosition = oldPosition;
- return TokenNameLESS;
- }
- switch (currentCharacter) {
- case '=' :
- return TokenNameLESS_EQUAL;
- case '>' :
- return TokenNameNOT_EQUAL;
- case '<' :
- if (getNextChar('='))
- return TokenNameLEFT_SHIFT_EQUAL;
- if (getNextChar('<')) {
+ return TokenNameNOT_EQUAL;
+ }
+ return TokenNameNOT;
+ case '*' :
+ if (getNextChar('='))
+ return TokenNameMULTIPLY_EQUAL;
+ return TokenNameMULTIPLY;
+ case '%' :
+ if (getNextChar('='))
+ return TokenNameREMAINDER_EQUAL;
+ return TokenNameREMAINDER;
+ case '<' :
+ {
+ int oldPosition = currentPosition;
+ try {
+ currentCharacter = source[currentPosition++];
+ } catch (IndexOutOfBoundsException e) {
+ currentPosition = oldPosition;
+ return TokenNameLESS;
+ }
+ switch (currentCharacter) {
+ case '=' :
+ return TokenNameLESS_EQUAL;
+ case '>' :
+ return TokenNameNOT_EQUAL;
+ case '<' :
+ if (getNextChar('='))
+ return TokenNameLEFT_SHIFT_EQUAL;
+ if (getNextChar('<')) {
+ currentCharacter = source[currentPosition++];
+ while (Character.isWhitespace(currentCharacter)) {
currentCharacter = source[currentPosition++];
- while (Character.isWhitespace(currentCharacter)) {
- currentCharacter = source[currentPosition++];
- }
- int heredocStart = currentPosition - 1;
- int heredocLength = 0;
- if (isPHPIdentifierStart(currentCharacter)) {
- currentCharacter = source[currentPosition++];
- } else {
- return TokenNameERROR;
- }
- while (isPHPIdentifierPart(currentCharacter)) {
- currentCharacter = source[currentPosition++];
- }
- heredocLength = currentPosition - heredocStart - 1;
- // heredoc end-tag determination
- boolean endTag = true;
- char ch;
- do {
- ch = source[currentPosition++];
- if (ch == '\r' || ch == '\n') {
- if (recordLineSeparator) {
- pushLineSeparator();
- } else {
- currentLine = null;
- }
- for (int i = 0; i < heredocLength; i++) {
- if (source[currentPosition + i] != source[heredocStart
- + i]) {
- endTag = false;
- break;
- }
- }
- if (endTag) {
- currentPosition += heredocLength - 1;
- currentCharacter = source[currentPosition++];
- break; // do...while loop
- } else {
- endTag = true;
+ }
+ int heredocStart = currentPosition - 1;
+ int heredocLength = 0;
+ if (isPHPIdentifierStart(currentCharacter)) {
+ currentCharacter = source[currentPosition++];
+ } else {
+ return TokenNameERROR;
+ }
+ while (isPHPIdentifierPart(currentCharacter)) {
+ currentCharacter = source[currentPosition++];
+ }
+ heredocLength = currentPosition - heredocStart - 1;
+ // heredoc end-tag determination
+ boolean endTag = true;
+ char ch;
+ do {
+ ch = source[currentPosition++];
+ if (ch == '\r' || ch == '\n') {
+ if (recordLineSeparator) {
+ pushLineSeparator();
+ } else {
+ currentLine = null;
+ }
+ for (int i = 0; i < heredocLength; i++) {
+ if (source[currentPosition + i] != source[heredocStart
+ + i]) {
+ endTag = false;
+ break;
}
}
- } while (true);
- return TokenNameHEREDOC;
- }
- return TokenNameLEFT_SHIFT;
- }
- currentPosition = oldPosition;
- return TokenNameLESS;
+ if (endTag) {
+ currentPosition += heredocLength - 1;
+ currentCharacter = source[currentPosition++];
+ break; // do...while loop
+ } else {
+ endTag = true;
+ }
+ }
+ } while (true);
+ return TokenNameHEREDOC;
+ }
+ return TokenNameLEFT_SHIFT;
}
- case '>' :
- {
- int test;
+ currentPosition = oldPosition;
+ return TokenNameLESS;
+ }
+ case '>' :
+ {
+ int test;
+ if ((test = getNextChar('=', '>')) == 0)
+ return TokenNameGREATER_EQUAL;
+ if (test > 0) {
if ((test = getNextChar('=', '>')) == 0)
- return TokenNameGREATER_EQUAL;
- if (test > 0) {
- if ((test = getNextChar('=', '>')) == 0)
- return TokenNameRIGHT_SHIFT_EQUAL;
- return TokenNameRIGHT_SHIFT;
- }
- return TokenNameGREATER;
+ return TokenNameRIGHT_SHIFT_EQUAL;
+ return TokenNameRIGHT_SHIFT;
}
- case '=' :
+ return TokenNameGREATER;
+ }
+ case '=' :
+ if (getNextChar('=')) {
if (getNextChar('=')) {
- if (getNextChar('=')) {
- return TokenNameEQUAL_EQUAL_EQUAL;
- }
- return TokenNameEQUAL_EQUAL;
+ return TokenNameEQUAL_EQUAL_EQUAL;
}
- if (getNextChar('>'))
- return TokenNameEQUAL_GREATER;
- return TokenNameEQUAL;
- case '&' :
- {
- int test;
- if ((test = getNextChar('&', '=')) == 0)
- return TokenNameAND_AND;
- if (test > 0)
- return TokenNameAND_EQUAL;
- return TokenNameAND;
- }
- case '|' :
- {
- int test;
- if ((test = getNextChar('|', '=')) == 0)
- return TokenNameOR_OR;
- if (test > 0)
- return TokenNameOR_EQUAL;
- return TokenNameOR;
+ return TokenNameEQUAL_EQUAL;
+ }
+ if (getNextChar('>'))
+ return TokenNameEQUAL_GREATER;
+ return TokenNameEQUAL;
+ case '&' :
+ {
+ int test;
+ if ((test = getNextChar('&', '=')) == 0)
+ return TokenNameAND_AND;
+ if (test > 0)
+ return TokenNameAND_EQUAL;
+ return TokenNameAND;
+ }
+ case '|' :
+ {
+ int test;
+ if ((test = getNextChar('|', '=')) == 0)
+ return TokenNameOR_OR;
+ if (test > 0)
+ return TokenNameOR_EQUAL;
+ return TokenNameOR;
+ }
+ case '^' :
+ if (getNextChar('='))
+ return TokenNameXOR_EQUAL;
+ return TokenNameXOR;
+ case '?' :
+ if (getNextChar('>')) {
+ phpMode = false;
+ if (currentPosition == source.length) {
+ phpMode = true;
+ return TokenNameINLINE_HTML;
}
- case '^' :
- if (getNextChar('='))
- return TokenNameXOR_EQUAL;
- return TokenNameXOR;
- case '?' :
- if (getNextChar('>')) {
- phpMode = false;
- if (currentPosition == source.length) {
- phpMode = true;
- return TokenNameINLINE_HTML;
- }
- return getInlinedHTML(currentPosition - 2);
+ return getInlinedHTML(currentPosition - 2);
+ }
+ return TokenNameQUESTION;
+ case ':' :
+ if (getNextChar(':'))
+ return TokenNamePAAMAYIM_NEKUDOTAYIM;
+ return TokenNameCOLON;
+ case '@' :
+ return TokenNameAT;
+ case '\'' :
+ consumeStringConstant();
+ return TokenNameStringConstant;
+ case '"' :
+ if (tokenizeStrings) {
+ consumeStringLiteral();
+ return TokenNameStringLiteral;
+ }
+ return TokenNameEncapsedString2;
+ case '`' :
+ if (tokenizeStrings) {
+ consumeStringInterpolated();
+ return TokenNameStringInterpolated;
+ }
+ return TokenNameEncapsedString0;
+ case '#' :
+ case '/' :
+ {
+ char startChar = currentCharacter;
+ if (getNextChar('=')) {
+ return TokenNameDIVIDE_EQUAL;
}
- return TokenNameQUESTION;
- case ':' :
- if (getNextChar(':'))
- return TokenNamePAAMAYIM_NEKUDOTAYIM;
- return TokenNameCOLON;
- case '@' :
- return TokenNameAT;
- case '\'' :
-// return TokenNameEncapsedString1;
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '\'') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
+ int test;
+ if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
+ //line comment
+ int endPositionForLineComment = 0;
+ try { //get the next char
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++])
+ // == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // //-------------unicode traitement ------------
+ // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
+ // currentPosition++;
+ // while (source[currentPosition] == 'u') {
+ // currentPosition++;
+ // }
+ // if ((c1 =
+ // Character.getNumericValue(source[currentPosition++]))
+ // > 15
+ // || c1 < 0
+ // || (c2 =
+ // Character.getNumericValue(source[currentPosition++]))
+ // > 15
+ // || c2 < 0
+ // || (c3 =
+ // Character.getNumericValue(source[currentPosition++]))
+ // > 15
+ // || c3 < 0
+ // || (c4 =
+ // Character.getNumericValue(source[currentPosition++]))
+ // > 15
+ // || c4 < 0) {
+ // throw new
+ // InvalidInputException(INVALID_UNICODE_ESCAPE);
+ // } else {
+ // currentCharacter =
+ // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
// }
// }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanSingleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
+ //handle the \\u case manually into comment
+ // if (currentCharacter == '\\') {
+ // if (source[currentPosition] == '\\')
+ // currentPosition++;
+ // } //jump over the \\
+ boolean isUnicode = false;
+ while (currentCharacter != '\r' && currentCharacter != '\n') {
+ if (currentCharacter == '?') {
+ if (getNextChar('>')) {
+ startPosition = currentPosition - 2;
+ phpMode = false;
+ return TokenNameINLINE_HTML;
}
}
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '\'') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
- }
- return TokenNameStringConstant;
- case '"' :
- return TokenNameEncapsedString2;
- case '`' :
- return TokenNameEncapsedString0;
- case '#' :
- case '/' :
- {
- char startChar = currentCharacter;
- if (getNextChar('=')) {
- return TokenNameDIVIDE_EQUAL;
- }
- int test;
- if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
- //line comment
- int endPositionForLineComment = 0;
- try { //get the next char
+ //get the next char
+ isUnicode = false;
currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++])
- // == '\\')
- // && (source[currentPosition] == 'u')) {
- // //-------------unicode traitement ------------
- // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
- // currentPosition++;
- // while (source[currentPosition] == 'u') {
+ // if (((currentCharacter = source[currentPosition++])
+ // == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // isUnicode = true;
+ // //-------------unicode traitement ------------
+ // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
// currentPosition++;
- // }
- // if ((c1 =
- // Character.getNumericValue(source[currentPosition++]))
- // > 15
- // || c1 < 0
- // || (c2 =
- // Character.getNumericValue(source[currentPosition++]))
- // > 15
- // || c2 < 0
- // || (c3 =
- // Character.getNumericValue(source[currentPosition++]))
- // > 15
- // || c3 < 0
- // || (c4 =
+ // while (source[currentPosition] == 'u') {
+ // currentPosition++;
+ // }
+ // if ((c1 =
// Character.getNumericValue(source[currentPosition++]))
// > 15
- // || c4 < 0) {
- // throw new
+ // || c1 < 0
+ // || (c2 =
+ // Character.getNumericValue(
+ // source[currentPosition++]))
+ // > 15
+ // || c2 < 0
+ // || (c3 =
+ // Character.getNumericValue(
+ // source[currentPosition++]))
+ // > 15
+ // || c3 < 0
+ // || (c4 =
+ // Character.getNumericValue(
+ // source[currentPosition++]))
+ // > 15
+ // || c4 < 0) {
+ // throw new
// InvalidInputException(INVALID_UNICODE_ESCAPE);
- // } else {
- // currentCharacter =
- // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
+ // } else {
+ // currentCharacter =
+ // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
+ // }
// }
- // }
//handle the \\u case manually into comment
- // if (currentCharacter == '\\') {
- // if (source[currentPosition] == '\\')
- // currentPosition++;
- // } //jump over the \\
- boolean isUnicode = false;
- while (currentCharacter != '\r'
- && currentCharacter != '\n') {
- if (currentCharacter == '?') {
- if (getNextChar('>')) {
- startPosition = currentPosition - 2;
- phpMode = false;
- return TokenNameINLINE_HTML;
- }
- }
- //get the next char
- isUnicode = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++])
- // == '\\')
- // && (source[currentPosition] == 'u')) {
- // isUnicode = true;
- // //-------------unicode traitement ------------
- // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
- // currentPosition++;
- // while (source[currentPosition] == 'u') {
- // currentPosition++;
- // }
- // if ((c1 =
- // Character.getNumericValue(source[currentPosition++]))
- // > 15
- // || c1 < 0
- // || (c2 =
- // Character.getNumericValue(
- // source[currentPosition++]))
- // > 15
- // || c2 < 0
- // || (c3 =
- // Character.getNumericValue(
- // source[currentPosition++]))
- // > 15
- // || c3 < 0
- // || (c4 =
- // Character.getNumericValue(
- // source[currentPosition++]))
- // > 15
- // || c4 < 0) {
- // throw new
- // InvalidInputException(INVALID_UNICODE_ESCAPE);
- // } else {
- // currentCharacter =
- // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
- // }
- // }
- //handle the \\u case manually into comment
- // if (currentCharacter == '\\') {
- // if (source[currentPosition] == '\\')
- // currentPosition++;
- // } //jump over the \\
- }
- if (isUnicode) {
- endPositionForLineComment = currentPosition - 6;
- } else {
- endPositionForLineComment = currentPosition - 1;
- }
- recordComment(false);
- if ((currentCharacter == '\r')
- || (currentCharacter == '\n')) {
- checkNonExternalizeString();
- if (recordLineSeparator) {
- if (isUnicode) {
- pushUnicodeLineSeparator();
- } else {
- pushLineSeparator();
- }
- } else {
- currentLine = null;
- }
- }
- if (tokenizeComments) {
- if (!isUnicode) {
- currentPosition = endPositionForLineComment;
- // reset one character behind
- }
- return TokenNameCOMMENT_LINE;
- }
- } catch (IndexOutOfBoundsException e) { //an eof will them
- // be generated
- if (tokenizeComments) {
- currentPosition--;
- // reset one character behind
- return TokenNameCOMMENT_LINE;
- }
+ // if (currentCharacter == '\\') {
+ // if (source[currentPosition] == '\\')
+ // currentPosition++;
+ // } //jump over the \\
}
- break;
- }
- if (test > 0) {
- //traditional and annotation comment
- boolean isJavadoc = false, star = false;
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- if (currentCharacter == '*') {
- isJavadoc = true;
- star = true;
+ if (isUnicode) {
+ endPositionForLineComment = currentPosition - 6;
+ } else {
+ endPositionForLineComment = currentPosition - 1;
}
+ recordComment(false);
if ((currentCharacter == '\r')
|| (currentCharacter == '\n')) {
checkNonExternalizeString();
if (recordLineSeparator) {
- pushLineSeparator();
+ if (isUnicode) {
+ pushUnicodeLineSeparator();
+ } else {
+ pushLineSeparator();
+ }
} else {
currentLine = null;
}
}
- try { //get the next char
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++])
- // == '\\')
- // && (source[currentPosition] == 'u')) {
- // //-------------unicode traitement ------------
- // getNextUnicodeChar();
- // }
- //handle the \\u case manually into comment
- // if (currentCharacter == '\\') {
- // if (source[currentPosition] == '\\')
- // currentPosition++;
- // //jump over the \\
- // }
- // empty comment is not a javadoc /**/
- if (currentCharacter == '/') {
- isJavadoc = false;
- }
- //loop until end of comment */
- while ((currentCharacter != '/') || (!star)) {
- if ((currentCharacter == '\r')
- || (currentCharacter == '\n')) {
- checkNonExternalizeString();
- if (recordLineSeparator) {
- pushLineSeparator();
- } else {
- currentLine = null;
- }
- }
- star = currentCharacter == '*';
- //get next char
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++])
- // == '\\')
- // && (source[currentPosition] == 'u')) {
- // //-------------unicode traitement ------------
- // getNextUnicodeChar();
- // }
- //handle the \\u case manually into comment
- // if (currentCharacter == '\\') {
- // if (source[currentPosition] == '\\')
- // currentPosition++;
- // } //jump over the \\
- }
- recordComment(isJavadoc);
- if (tokenizeComments) {
- if (isJavadoc)
- return TokenNameCOMMENT_PHPDOC;
- return TokenNameCOMMENT_BLOCK;
+ if (tokenizeComments) {
+ if (!isUnicode) {
+ currentPosition = endPositionForLineComment;
+ // reset one character behind
}
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_COMMENT);
+ return TokenNameCOMMENT_LINE;
+ }
+ } catch (IndexOutOfBoundsException e) { //an eof will them
+ // be generated
+ if (tokenizeComments) {
+ currentPosition--;
+ // reset one character behind
+ return TokenNameCOMMENT_LINE;
}
- break;
}
- return TokenNameDIVIDE;
+ break;
}
- case '\u001a' :
- if (atEnd())
- return TokenNameEOF;
- //the atEnd may not be <currentPosition == source.length> if
- // source is only some part of a real (external) stream
- throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
- default :
- if (currentCharacter == '$') {
- int oldPosition = currentPosition;
- try {
- currentCharacter = source[currentPosition++];
- if (isPHPIdentifierStart(currentCharacter)) {
- return scanIdentifierOrKeyword(true);
+ if (test > 0) {
+ //traditional and annotation comment
+ boolean isJavadoc = false, star = false;
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) ==
+ // '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ if (currentCharacter == '*') {
+ isJavadoc = true;
+ star = true;
+ }
+ if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
+ checkNonExternalizeString();
+ if (recordLineSeparator) {
+ pushLineSeparator();
} else {
- currentPosition = oldPosition;
- return TokenNameDOLLAR;
+ currentLine = null;
+ }
+ }
+ try { //get the next char
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++])
+ // == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // //-------------unicode traitement ------------
+ // getNextUnicodeChar();
+ // }
+ //handle the \\u case manually into comment
+ // if (currentCharacter == '\\') {
+ // if (source[currentPosition] == '\\')
+ // currentPosition++;
+ // //jump over the \\
+ // }
+ // empty comment is not a javadoc /**/
+ if (currentCharacter == '/') {
+ isJavadoc = false;
+ }
+ //loop until end of comment */
+ while ((currentCharacter != '/') || (!star)) {
+ if ((currentCharacter == '\r')
+ || (currentCharacter == '\n')) {
+ checkNonExternalizeString();
+ if (recordLineSeparator) {
+ pushLineSeparator();
+ } else {
+ currentLine = null;
+ }
+ }
+ star = currentCharacter == '*';
+ //get next char
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++])
+ // == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // //-------------unicode traitement ------------
+ // getNextUnicodeChar();
+ // }
+ //handle the \\u case manually into comment
+ // if (currentCharacter == '\\') {
+ // if (source[currentPosition] == '\\')
+ // currentPosition++;
+ // } //jump over the \\
+ }
+ recordComment(isJavadoc);
+ if (tokenizeComments) {
+ if (isJavadoc)
+ return TokenNameCOMMENT_PHPDOC;
+ return TokenNameCOMMENT_BLOCK;
}
} catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_COMMENT);
+ }
+ break;
+ }
+ return TokenNameDIVIDE;
+ }
+ case '\u001a' :
+ if (atEnd())
+ return TokenNameEOF;
+ //the atEnd may not be <currentPosition == source.length> if
+ // source is only some part of a real (external) stream
+ throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
+ default :
+ if (currentCharacter == '$') {
+ int oldPosition = currentPosition;
+ try {
+ currentCharacter = source[currentPosition++];
+ if (isPHPIdentifierStart(currentCharacter)) {
+ return scanIdentifierOrKeyword(true);
+ } else {
currentPosition = oldPosition;
return TokenNameDOLLAR;
}
+ } catch (IndexOutOfBoundsException e) {
+ currentPosition = oldPosition;
+ return TokenNameDOLLAR;
}
- if (isPHPIdentifierStart(currentCharacter))
- return scanIdentifierOrKeyword(false);
- if (Character.isDigit(currentCharacter))
- return scanNumber(false);
- return TokenNameERROR;
- }
-
+ }
+ if (isPHPIdentifierStart(currentCharacter))
+ return scanIdentifierOrKeyword(false);
+ if (Character.isDigit(currentCharacter))
+ return scanNumber(false);
+ return TokenNameERROR;
+ }
}
} //-----------------end switch while try--------------------
catch (IndexOutOfBoundsException e) {
}
public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
boolean checkNonExternalizedStringLiterals, boolean assertMode) {
+ this(tokenizeComments, tokenizeWhiteSpace,
+ checkNonExternalizedStringLiterals, assertMode, false);
+ }
+ public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
+ boolean checkNonExternalizedStringLiterals, boolean assertMode,
+ boolean tokenizeStrings) {
this.eofPosition = Integer.MAX_VALUE;
this.tokenizeComments = tokenizeComments;
this.tokenizeWhiteSpace = tokenizeWhiteSpace;
+ this.tokenizeStrings = tokenizeStrings;
this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
this.assertMode = assertMode;
this.encapsedStringStack = null;