import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Stack;
import net.sourceforge.phpdt.core.compiler.CharOperation;
import net.sourceforge.phpdt.core.compiler.IScanner;
import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
public boolean containsAssertKeyword = false;
public boolean recordLineSeparator;
public boolean phpMode = false;
+ public Stack encapsedStringStack = null;
public char currentCharacter;
public int startPosition;
public int currentPosition;
// source
public boolean tokenizeComments;
public boolean tokenizeWhiteSpace;
+ public boolean tokenizeStrings;
//source should be viewed as a window (aka a part)
//of a entire very large stream
public char source[];
public char[][] taskPriorities = null;
public static final boolean DEBUG = false;
public static final boolean TRACE = false;
- public Scanner() {
- this(false, false);
- }
- public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
- this(tokenizeComments, tokenizeWhiteSpace, false);
- }
+
/**
* Determines if the specified character is permissible as the first
* character in a PHP identifier
currentPosition = tempPosition;
return TokenNameLPAREN;
}
+ public void consumeStringInterpolated() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '`') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '`') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
+ public void consumeStringConstant() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '\'') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanSingleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '\'') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
+ public void consumeStringLiteral() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '"') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '\"') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
public int getNextToken() throws InvalidInputException {
if (!phpMode) {
return getInlinedHTML(currentPosition);
return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
}
try {
- while (true) { //loop for jumping over comments
+ while (true) {
withoutUnicodePtr = 0;
- //start with a new token (even comment written with unicode )
+ //start with a new token
+ char encapsedChar = ' ';
+ if (!encapsedStringStack.isEmpty()) {
+ encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
+ }
+ if (encapsedChar != '$' && encapsedChar != ' ') {
+ currentCharacter = source[currentPosition++];
+ if (currentCharacter == encapsedChar) {
+ switch (currentCharacter) {
+ case '`' :
+ return TokenNameEncapsedString0;
+ case '\'' :
+ return TokenNameEncapsedString1;
+ case '"' :
+ return TokenNameEncapsedString2;
+ }
+ }
+ while (currentCharacter != encapsedChar) {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ switch (currentCharacter) {
+ case '\\' :
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and
+ // we need the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition,
+ withoutUnicodeBuffer, 1, withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in
+ withoutUnicodePtr--;
+ }
+ }
+ break;
+ case '$' :
+ if (isPHPIdentifierStart(source[currentPosition])
+ || source[currentPosition] == '{') {
+ currentPosition--;
+ encapsedStringStack.push(new Character('$'));
+ return TokenNameSTRING;
+ }
+ break;
+ case '{' :
+ if (source[currentPosition] == '$') { // CURLY_OPEN
+ currentPosition--;
+ encapsedStringStack.push(new Character('$'));
+ return TokenNameSTRING;
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ } // end while
+ currentPosition--;
+ return TokenNameSTRING;
+ }
// ---------Consume white space and handles startPosition---------
int whiteStart = currentPosition;
- boolean isWhiteSpace;
- do {
+ startPosition = currentPosition;
+ currentCharacter = source[currentPosition++];
+ if (encapsedChar == '$') {
+ switch (currentCharacter) {
+ case '\\' :
+ currentCharacter = source[currentPosition++];
+ return TokenNameSTRING;
+ case '{' :
+ if (encapsedChar == '$') {
+ if (getNextChar('$'))
+ return TokenNameCURLY_OPEN;
+ }
+ return TokenNameLBRACE;
+ case '}' :
+ return TokenNameRBRACE;
+ case '[' :
+ return TokenNameLBRACKET;
+ case ']' :
+ return TokenNameRBRACKET;
+ case '\'' :
+ if (tokenizeStrings) {
+ consumeStringConstant();
+ return TokenNameStringConstant;
+ }
+ return TokenNameEncapsedString1;
+ case '"' :
+ return TokenNameEncapsedString2;
+ case '`' :
+ if (tokenizeStrings) {
+ consumeStringInterpolated();
+ return TokenNameStringInterpolated;
+ }
+ return TokenNameEncapsedString0;
+ case '-' :
+ if (getNextChar('>'))
+ return TokenNameMINUS_GREATER;
+ return TokenNameSTRING;
+ default :
+ if (currentCharacter == '$') {
+ int oldPosition = currentPosition;
+ try {
+ currentCharacter = source[currentPosition++];
+ if (currentCharacter == '{') {
+ return TokenNameDOLLAR_LBRACE;
+ }
+ if (isPHPIdentifierStart(currentCharacter)) {
+ return scanIdentifierOrKeyword(true);
+ } else {
+ currentPosition = oldPosition;
+ return TokenNameSTRING;
+ }
+ } catch (IndexOutOfBoundsException e) {
+ currentPosition = oldPosition;
+ return TokenNameSTRING;
+ }
+ }
+ if (isPHPIdentifierStart(currentCharacter))
+ return scanIdentifierOrKeyword(false);
+ if (Character.isDigit(currentCharacter))
+ return scanNumber(false);
+ return TokenNameERROR;
+ }
+ }
+ // boolean isWhiteSpace;
+
+ while ((currentCharacter == ' ')
+ || Character.isWhitespace(currentCharacter)) {
startPosition = currentPosition;
currentCharacter = source[currentPosition++];
// if (((currentCharacter = source[currentPosition++]) == '\\')
currentLine = null;
}
}
- isWhiteSpace = (currentCharacter == ' ')
- || Character.isWhitespace(currentCharacter);
+ // isWhiteSpace = (currentCharacter == ' ')
+ // || Character.isWhitespace(currentCharacter);
// }
- } while (isWhiteSpace);
+ }
if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
// reposition scanner in case we are interested by spaces as tokens
currentPosition--;
if (getNextChar('='))
return TokenNameLEFT_SHIFT_EQUAL;
if (getNextChar('<')) {
- int heredocStart = currentPosition;
- int heredocLength = 0;
currentCharacter = source[currentPosition++];
+ while (Character.isWhitespace(currentCharacter)) {
+ currentCharacter = source[currentPosition++];
+ }
+ int heredocStart = currentPosition - 1;
+ int heredocLength = 0;
if (isPHPIdentifierStart(currentCharacter)) {
currentCharacter = source[currentPosition++];
} else {
case '?' :
if (getNextChar('>')) {
phpMode = false;
- if (currentPosition==source.length) {
+ if (currentPosition == source.length) {
phpMode = true;
return TokenNameINLINE_HTML;
}
return TokenNameCOLON;
case '@' :
return TokenNameAT;
- // case '\'' :
- // {
- // int test;
- // if ((test = getNextChar('\n', '\r')) == 0) {
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
- // }
- // if (test > 0) {
- // // relocate if finding another quote fairly close: thus unicode
- // '/u000D' will be fully consumed
- // for (int lookAhead = 0;
- // lookAhead < 3;
- // lookAhead++) {
- // if (currentPosition + lookAhead
- // == source.length)
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\n')
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\'') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
- // }
- // }
- // if (getNextChar('\'')) {
- // // relocate if finding another quote fairly close: thus unicode
- // '/u000D' will be fully consumed
- // for (int lookAhead = 0;
- // lookAhead < 3;
- // lookAhead++) {
- // if (currentPosition + lookAhead
- // == source.length)
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\n')
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\'') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
- // }
- // if (getNextChar('\\'))
- // scanEscapeCharacter();
- // else { // consume next character
- // unicodeAsBackSlash = false;
- // if (((currentCharacter = source[currentPosition++])
- // == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- // }
- // // if (getNextChar('\''))
- // // return TokenNameCharacterLiteral;
- // // relocate if finding another quote fairly close: thus unicode
- // '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\'') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
case '\'' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '\'') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanSingleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
- }
- }
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '\'') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
- }
+ consumeStringConstant();
return TokenNameStringConstant;
case '"' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '"') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanDoubleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
- }
- }
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '\"') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
+ if (tokenizeStrings) {
+ consumeStringLiteral();
+ return TokenNameStringLiteral;
}
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
- }
- return TokenNameStringLiteral;
+ return TokenNameEncapsedString2;
case '`' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '`') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanDoubleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
- }
- }
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '`') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
+ if (tokenizeStrings) {
+ consumeStringInterpolated();
+ return TokenNameStringInterpolated;
}
- return TokenNameStringInterpolated;
+ return TokenNameEncapsedString0;
case '#' :
case '/' :
{
// Character.getNumericValue(source[currentPosition++]))
// > 15
// || c4 < 0) {
- // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
+ // throw new
+ // InvalidInputException(INVALID_UNICODE_ESCAPE);
// } else {
// currentCharacter =
// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
endPositionForLineComment = currentPosition - 1;
}
recordComment(false);
- if ((currentCharacter == '\r')
+ if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
+ if ((currentCharacter == '\r')
|| (currentCharacter == '\n')) {
checkNonExternalizeString();
if (recordLineSeparator) {
*/
private int getInlinedHTML(int start) throws InvalidInputException {
// int htmlPosition = start;
- if (currentPosition>source.length) {
- currentPosition = source.length;
- return TokenNameEOF;
+ if (currentPosition > source.length) {
+ currentPosition = source.length;
+ return TokenNameEOF;
}
startPosition = start;
try {
initialPosition = currentPosition = 0;
containsAssertKeyword = false;
withoutUnicodeBuffer = new char[this.source.length];
+ encapsedStringStack = new Stack();
}
public String toString() {
if (startPosition == source.length)
return "endswitch"; //$NON-NLS-1$
case TokenNameendwhile :
return "endwhile"; //$NON-NLS-1$
+ case TokenNameexit:
+ return "exit";
case TokenNameextends :
return "extends"; //$NON-NLS-1$
// case TokenNamefalse :
case TokenNameDoubleLiteral :
return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameStringLiteral :
- return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+ return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameStringConstant :
return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameStringInterpolated :
return "StringInterpolated(" + new String(getCurrentTokenSource())
+ ")"; //$NON-NLS-1$ //$NON-NLS-2$
+ case TokenNameEncapsedString0 :
+ return "`"; //$NON-NLS-1$
+ case TokenNameEncapsedString1 :
+ return "\'"; //$NON-NLS-1$
+ case TokenNameEncapsedString2 :
+ return "\""; //$NON-NLS-1$
+ case TokenNameSTRING :
+ return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameHEREDOC :
return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
case TokenNamePLUS_PLUS :
return "@";
case TokenNameDOLLAR :
return "$";
- // case TokenNameDOLLAR_LBRACE :
- // return "${";
+ case TokenNameDOLLAR_LBRACE :
+ return "${";
case TokenNameEOF :
return "EOF"; //$NON-NLS-1$
case TokenNameWHITESPACE :
return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
case TokenNameCOMMENT_PHPDOC :
return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
- // case TokenNameHTML :
- // return "HTML(" + new String(getCurrentTokenSource()) + ")";
- // //$NON-NLS-1$
+ // case TokenNameHTML :
+ // return "HTML(" + new String(getCurrentTokenSource()) + ")";
+ // //$NON-NLS-1$
case TokenNameFILE :
return "__FILE__"; //$NON-NLS-1$
case TokenNameLINE :
+ new String(getCurrentTokenSource()); //$NON-NLS-1$
}
}
+
+ public Scanner() {
+ this(false, false);
+ }
+ public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
+ this(tokenizeComments, tokenizeWhiteSpace, false);
+ }
public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
boolean checkNonExternalizedStringLiterals) {
this(tokenizeComments, tokenizeWhiteSpace,
}
public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
boolean checkNonExternalizedStringLiterals, boolean assertMode) {
+ this(tokenizeComments, tokenizeWhiteSpace,
+ checkNonExternalizedStringLiterals, assertMode, false, null, null);
+ }
+ public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
+ boolean checkNonExternalizedStringLiterals, boolean assertMode,
+ boolean tokenizeStrings,
+ char[][] taskTags,
+ char[][] taskPriorities) {
this.eofPosition = Integer.MAX_VALUE;
this.tokenizeComments = tokenizeComments;
this.tokenizeWhiteSpace = tokenizeWhiteSpace;
+ this.tokenizeStrings = tokenizeStrings;
this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
this.assertMode = assertMode;
+ this.encapsedStringStack = null;
+ this.taskTags = taskTags;
+ this.taskPriorities = taskPriorities;
}
private void checkNonExternalizeString() throws InvalidInputException {
if (currentLine == null)