import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
+import java.util.Stack;
+
import net.sourceforge.phpdt.core.compiler.CharOperation;
import net.sourceforge.phpdt.core.compiler.IScanner;
import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
import net.sourceforge.phpdt.core.compiler.InvalidInputException;
-import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
+import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
+
+
public class Scanner implements IScanner, ITerminalSymbols {
/*
* APIs ares - getNextToken() which return the current type of the token
public boolean containsAssertKeyword = false;
public boolean recordLineSeparator;
public boolean phpMode = false;
+ public Stack encapsedStringStack = null;
public char currentCharacter;
public int startPosition;
public int currentPosition;
// source
public boolean tokenizeComments;
public boolean tokenizeWhiteSpace;
+ public boolean tokenizeStrings;
//source should be viewed as a window (aka a part)
//of a entire very large stream
public char source[];
//when == 0 ==> no unicode in the current token
public boolean unicodeAsBackSlash = false;
public boolean scanningFloatLiteral = false;
- //support for /** comments
- //public char[][] comments = new char[10][];
- public int[] commentStops = new int[10];
- public int[] commentStarts = new int[10];
- public int commentPtr = -1; // no comment test with commentPtr value -1
+//support for /** comments
+ public int[] commentStops = new int[10];
+ public int[] commentStarts = new int[10];
+ public int commentPtr = -1; // no comment test with commentPtr value -1
+ protected int lastCommentLinePosition = -1;
//diet parsing support - jump over some method body when requested
public boolean diet = false;
//support for the poor-line-debuggers ....
public char[][] taskPriorities = null;
public static final boolean DEBUG = false;
public static final boolean TRACE = false;
- public Scanner() {
- this(false, false);
- }
- public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
- this(tokenizeComments, tokenizeWhiteSpace, false);
- }
+
/**
* Determines if the specified character is permissible as the first
* character in a PHP identifier
currentPosition = tempPosition;
return TokenNameLPAREN;
}
+ public void consumeStringInterpolated() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '`') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '`') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
+ public void consumeStringConstant() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '\'') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanSingleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '\'') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
+ public void consumeStringLiteral() throws InvalidInputException {
+ try {
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ // if (withoutUnicodePtr != 0) {
+ // withoutUnicodeBuffer[++withoutUnicodePtr] =
+ // currentCharacter;
+ // }
+ // }
+ while (currentCharacter != '"') {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ // if ((currentCharacter == '\n')
+ // || (currentCharacter == '\r')) {
+ // // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ // if (currentPosition + lookAhead == source.length)
+ // break;
+ // if (source[currentPosition + lookAhead] == '\n')
+ // break;
+ // if (source[currentPosition + lookAhead] == '\"') {
+ // currentPosition += lookAhead + 1;
+ // break;
+ // }
+ // }
+ // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
+ // }
+ if (currentCharacter == '\\') {
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and we need
+ // the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
+ withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
+ // where only one is correct
+ withoutUnicodePtr--;
+ }
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ // if (((currentCharacter = source[currentPosition++]) == '\\')
+ // && (source[currentPosition] == 'u')) {
+ // getNextUnicodeChar();
+ // } else {
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ }
+ } catch (IndexOutOfBoundsException e) {
+ throw new InvalidInputException(UNTERMINATED_STRING);
+ } catch (InvalidInputException e) {
+ if (e.getMessage().equals(INVALID_ESCAPE)) {
+ // relocate if finding another quote fairly close: thus unicode
+ // '/u000D' will be fully consumed
+ for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
+ if (currentPosition + lookAhead == source.length)
+ break;
+ if (source[currentPosition + lookAhead] == '\n')
+ break;
+ if (source[currentPosition + lookAhead] == '\"') {
+ currentPosition += lookAhead + 1;
+ break;
+ }
+ }
+ }
+ throw e; // rethrow
+ }
+ if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
+ // //$NON-NLS-?$ where ? is an
+ // int.
+ if (currentLine == null) {
+ currentLine = new NLSLine();
+ lines.add(currentLine);
+ }
+ currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
+ startPosition, currentPosition - 1));
+ }
+ }
public int getNextToken() throws InvalidInputException {
if (!phpMode) {
return getInlinedHTML(currentPosition);
return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
}
try {
- while (true) { //loop for jumping over comments
+ while (true) {
withoutUnicodePtr = 0;
- //start with a new token (even comment written with unicode )
+ //start with a new token
+ char encapsedChar = ' ';
+ if (!encapsedStringStack.isEmpty()) {
+ encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
+ }
+ if (encapsedChar != '$' && encapsedChar != ' ') {
+ currentCharacter = source[currentPosition++];
+ if (currentCharacter == encapsedChar) {
+ switch (currentCharacter) {
+ case '`' :
+ return TokenNameEncapsedString0;
+ case '\'' :
+ return TokenNameEncapsedString1;
+ case '"' :
+ return TokenNameEncapsedString2;
+ }
+ }
+ while (currentCharacter != encapsedChar) {
+ /** ** in PHP \r and \n are valid in string literals *** */
+ switch (currentCharacter) {
+ case '\\' :
+ int escapeSize = currentPosition;
+ boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
+ //scanEscapeCharacter make a side effect on this value and
+ // we need the previous value few lines down this one
+ scanDoubleQuotedEscapeCharacter();
+ escapeSize = currentPosition - escapeSize;
+ if (withoutUnicodePtr == 0) {
+ //buffer all the entries that have been left aside....
+ withoutUnicodePtr = currentPosition - escapeSize - 1
+ - startPosition;
+ System.arraycopy(source, startPosition,
+ withoutUnicodeBuffer, 1, withoutUnicodePtr);
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ } else { //overwrite the / in the buffer
+ withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
+ if (backSlashAsUnicodeInString) { //there are TWO \ in
+ withoutUnicodePtr--;
+ }
+ }
+ break;
+ case '$' :
+ if (isPHPIdentifierStart(source[currentPosition])
+ || source[currentPosition] == '{') {
+ currentPosition--;
+ encapsedStringStack.push(new Character('$'));
+ return TokenNameSTRING;
+ }
+ break;
+ case '{' :
+ if (source[currentPosition] == '$') { // CURLY_OPEN
+ currentPosition--;
+ encapsedStringStack.push(new Character('$'));
+ return TokenNameSTRING;
+ }
+ }
+ // consume next character
+ unicodeAsBackSlash = false;
+ currentCharacter = source[currentPosition++];
+ if (withoutUnicodePtr != 0) {
+ withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
+ }
+ // }
+ } // end while
+ currentPosition--;
+ return TokenNameSTRING;
+ }
// ---------Consume white space and handles startPosition---------
int whiteStart = currentPosition;
- boolean isWhiteSpace;
- do {
+ startPosition = currentPosition;
+ currentCharacter = source[currentPosition++];
+ if (encapsedChar == '$') {
+ switch (currentCharacter) {
+ case '\\' :
+ currentCharacter = source[currentPosition++];
+ return TokenNameSTRING;
+ case '{' :
+ if (encapsedChar == '$') {
+ if (getNextChar('$'))
+ return TokenNameCURLY_OPEN;
+ }
+ return TokenNameLBRACE;
+ case '}' :
+ return TokenNameRBRACE;
+ case '[' :
+ return TokenNameLBRACKET;
+ case ']' :
+ return TokenNameRBRACKET;
+ case '\'' :
+ if (tokenizeStrings) {
+ consumeStringConstant();
+ return TokenNameStringSingleQuote;
+ }
+ return TokenNameEncapsedString1;
+ case '"' :
+ return TokenNameEncapsedString2;
+ case '`' :
+ if (tokenizeStrings) {
+ consumeStringInterpolated();
+ return TokenNameStringInterpolated;
+ }
+ return TokenNameEncapsedString0;
+ case '-' :
+ if (getNextChar('>'))
+ return TokenNameMINUS_GREATER;
+ return TokenNameSTRING;
+ default :
+ if (currentCharacter == '$') {
+ int oldPosition = currentPosition;
+ try {
+ currentCharacter = source[currentPosition++];
+ if (currentCharacter == '{') {
+ return TokenNameDOLLAR_LBRACE;
+ }
+ if (isPHPIdentifierStart(currentCharacter)) {
+ return scanIdentifierOrKeyword(true);
+ } else {
+ currentPosition = oldPosition;
+ return TokenNameSTRING;
+ }
+ } catch (IndexOutOfBoundsException e) {
+ currentPosition = oldPosition;
+ return TokenNameSTRING;
+ }
+ }
+ if (isPHPIdentifierStart(currentCharacter))
+ return scanIdentifierOrKeyword(false);
+ if (Character.isDigit(currentCharacter))
+ return scanNumber(false);
+ return TokenNameERROR;
+ }
+ }
+ // boolean isWhiteSpace;
+
+ while ((currentCharacter == ' ')
+ || Character.isWhitespace(currentCharacter)) {
startPosition = currentPosition;
currentCharacter = source[currentPosition++];
// if (((currentCharacter = source[currentPosition++]) == '\\')
currentLine = null;
}
}
- isWhiteSpace = (currentCharacter == ' ')
- || Character.isWhitespace(currentCharacter);
+ // isWhiteSpace = (currentCharacter == ' ')
+ // || Character.isWhitespace(currentCharacter);
// }
- } while (isWhiteSpace);
+ }
if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
// reposition scanner in case we are interested by spaces as tokens
currentPosition--;
if (getNextChar('='))
return TokenNameLEFT_SHIFT_EQUAL;
if (getNextChar('<')) {
- int heredocStart = currentPosition;
- int heredocLength = 0;
currentCharacter = source[currentPosition++];
+ while (Character.isWhitespace(currentCharacter)) {
+ currentCharacter = source[currentPosition++];
+ }
+ int heredocStart = currentPosition - 1;
+ int heredocLength = 0;
if (isPHPIdentifierStart(currentCharacter)) {
currentCharacter = source[currentPosition++];
} else {
case '?' :
if (getNextChar('>')) {
phpMode = false;
- if (currentPosition==source.length) {
+ if (currentPosition == source.length) {
phpMode = true;
return TokenNameINLINE_HTML;
}
return TokenNameCOLON;
case '@' :
return TokenNameAT;
- // case '\'' :
- // {
- // int test;
- // if ((test = getNextChar('\n', '\r')) == 0) {
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
- // }
- // if (test > 0) {
- // // relocate if finding another quote fairly close: thus unicode
- // '/u000D' will be fully consumed
- // for (int lookAhead = 0;
- // lookAhead < 3;
- // lookAhead++) {
- // if (currentPosition + lookAhead
- // == source.length)
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\n')
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\'') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
- // }
- // }
- // if (getNextChar('\'')) {
- // // relocate if finding another quote fairly close: thus unicode
- // '/u000D' will be fully consumed
- // for (int lookAhead = 0;
- // lookAhead < 3;
- // lookAhead++) {
- // if (currentPosition + lookAhead
- // == source.length)
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\n')
- // break;
- // if (source[currentPosition + lookAhead]
- // == '\'') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
- // }
- // if (getNextChar('\\'))
- // scanEscapeCharacter();
- // else { // consume next character
- // unicodeAsBackSlash = false;
- // if (((currentCharacter = source[currentPosition++])
- // == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- // }
- // // if (getNextChar('\''))
- // // return TokenNameCharacterLiteral;
- // // relocate if finding another quote fairly close: thus unicode
- // '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\'') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
case '\'' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '\'') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanSingleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
- }
- }
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '\'') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
- }
- return TokenNameStringConstant;
+ consumeStringConstant();
+ return TokenNameStringSingleQuote;
case '"' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '"') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanDoubleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
- }
- }
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '\"') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
+ if (tokenizeStrings) {
+ consumeStringLiteral();
+ return TokenNameStringDoubleQuote;
}
- return TokenNameStringLiteral;
+ return TokenNameEncapsedString2;
case '`' :
- try {
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) == '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- // if (withoutUnicodePtr != 0) {
- // withoutUnicodeBuffer[++withoutUnicodePtr] =
- // currentCharacter;
- // }
- // }
- while (currentCharacter != '`') {
- /** ** in PHP \r and \n are valid in string literals *** */
- // if ((currentCharacter == '\n')
- // || (currentCharacter == '\r')) {
- // // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- // if (currentPosition + lookAhead == source.length)
- // break;
- // if (source[currentPosition + lookAhead] == '\n')
- // break;
- // if (source[currentPosition + lookAhead] == '\"') {
- // currentPosition += lookAhead + 1;
- // break;
- // }
- // }
- // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
- // }
- if (currentCharacter == '\\') {
- int escapeSize = currentPosition;
- boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
- //scanEscapeCharacter make a side effect on this value and
- // we need the previous value few lines down this one
- scanDoubleQuotedEscapeCharacter();
- escapeSize = currentPosition - escapeSize;
- if (withoutUnicodePtr == 0) {
- //buffer all the entries that have been left aside....
- withoutUnicodePtr = currentPosition - escapeSize - 1
- - startPosition;
- System.arraycopy(source, startPosition,
- withoutUnicodeBuffer, 1, withoutUnicodePtr);
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- } else { //overwrite the / in the buffer
- withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
- if (backSlashAsUnicodeInString) { //there are TWO \ in
- // the stream where
- // only one is correct
- withoutUnicodePtr--;
- }
- }
- }
- // consume next character
- unicodeAsBackSlash = false;
- currentCharacter = source[currentPosition++];
- // if (((currentCharacter = source[currentPosition++]) ==
- // '\\')
- // && (source[currentPosition] == 'u')) {
- // getNextUnicodeChar();
- // } else {
- if (withoutUnicodePtr != 0) {
- withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
- }
- // }
- }
- } catch (IndexOutOfBoundsException e) {
- throw new InvalidInputException(UNTERMINATED_STRING);
- } catch (InvalidInputException e) {
- if (e.getMessage().equals(INVALID_ESCAPE)) {
- // relocate if finding another quote fairly close: thus
- // unicode '/u000D' will be fully consumed
- for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
- if (currentPosition + lookAhead == source.length)
- break;
- if (source[currentPosition + lookAhead] == '\n')
- break;
- if (source[currentPosition + lookAhead] == '`') {
- currentPosition += lookAhead + 1;
- break;
- }
- }
- }
- throw e; // rethrow
- }
- if (checkNonExternalizedStringLiterals) { // check for presence
- // of NLS tags
- // //$NON-NLS-?$ where
- // ? is an int.
- if (currentLine == null) {
- currentLine = new NLSLine();
- lines.add(currentLine);
- }
- currentLine.add(new StringLiteral(
- getCurrentTokenSourceString(), startPosition,
- currentPosition - 1));
+ if (tokenizeStrings) {
+ consumeStringInterpolated();
+ return TokenNameStringInterpolated;
}
- return TokenNameStringInterpolated;
+ return TokenNameEncapsedString0;
case '#' :
case '/' :
{
int test;
if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
//line comment
+ this.lastCommentLinePosition = this.currentPosition;
int endPositionForLineComment = 0;
try { //get the next char
currentCharacter = source[currentPosition++];
// Character.getNumericValue(source[currentPosition++]))
// > 15
// || c4 < 0) {
- // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
+ // throw new
+ // InvalidInputException(INVALID_UNICODE_ESCAPE);
// } else {
// currentCharacter =
// (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
// } //jump over the \\
boolean isUnicode = false;
while (currentCharacter != '\r' && currentCharacter != '\n') {
+ this.lastCommentLinePosition = this.currentPosition;
if (currentCharacter == '?') {
if (getNextChar('>')) {
startPosition = currentPosition - 2;
} else {
endPositionForLineComment = currentPosition - 1;
}
- recordComment(false);
- if ((currentCharacter == '\r')
+// recordComment(false);
+ recordComment(TokenNameCOMMENT_LINE);
+ if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
+ if ((currentCharacter == '\r')
|| (currentCharacter == '\n')) {
checkNonExternalizeString();
if (recordLineSeparator) {
// currentPosition++;
// } //jump over the \\
}
- recordComment(isJavadoc);
+ //recordComment(isJavadoc);
+ if (isJavadoc) {
+ recordComment(TokenNameCOMMENT_PHPDOC);
+ } else {
+ recordComment(TokenNameCOMMENT_BLOCK);
+ }
+
if (tokenizeComments) {
if (isJavadoc)
return TokenNameCOMMENT_PHPDOC;
}
return TokenNameEOF;
}
+
+ private int getInlinedHTML(int start) throws InvalidInputException {
+ int token = getInlinedHTMLToken(start);
+ if (token == TokenNameINLINE_HTML) {
+// Stack stack = new Stack();
+// // scan html for errors
+// Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
+// int lastPHPEndPos=0;
+// for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
+// Tag tag=(Tag)i.next();
+//
+// if (tag instanceof StartTag) {
+// StartTag startTag=(StartTag)tag;
+// // System.out.println("startTag: "+tag);
+// if (startTag.isServerTag()) {
+// // TODO : what to do with a server tag ?
+// } else {
+// // do whatever with HTML start tag
+// // use startTag.getElement() to find the element corresponding
+// // to this start tag which may be useful if you implement code
+// // folding etc
+// stack.push(startTag);
+// }
+// } else {
+// EndTag endTag=(EndTag)tag;
+// StartTag stag = (StartTag) stack.peek();
+//// System.out.println("endTag: "+tag);
+// // do whatever with HTML end tag.
+// }
+// }
+ }
+ return token;
+ }
/**
* @return @throws
* InvalidInputException
*/
- private int getInlinedHTML(int start) throws InvalidInputException {
+ private int getInlinedHTMLToken(int start) throws InvalidInputException {
// int htmlPosition = start;
- if (currentPosition>source.length) {
- currentPosition = source.length;
- return TokenNameEOF;
+ if (currentPosition > source.length) {
+ currentPosition = source.length;
+ return TokenNameEOF;
}
startPosition = start;
try {
}
}
}
- public final void recordComment(boolean isJavadoc) {
- // a new annotation comment is recorded
- try {
- commentStops[++commentPtr] = isJavadoc
- ? currentPosition
- : -currentPosition;
- } catch (IndexOutOfBoundsException e) {
- int oldStackLength = commentStops.length;
- int[] oldStack = commentStops;
- commentStops = new int[oldStackLength + 30];
- System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
- commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
- //grows the positions buffers too
- int[] old = commentStarts;
- commentStarts = new int[oldStackLength + 30];
- System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
- }
- //the buffer is of a correct size here
- commentStarts[commentPtr] = startPosition;
- }
+ public void recordComment(int token) {
+ // compute position
+ int stopPosition = this.currentPosition;
+ switch (token) {
+ case TokenNameCOMMENT_LINE:
+ stopPosition = -this.lastCommentLinePosition;
+ break;
+ case TokenNameCOMMENT_BLOCK:
+ stopPosition = -this.currentPosition;
+ break;
+ }
+
+ // a new comment is recorded
+ int length = this.commentStops.length;
+ if (++this.commentPtr >= length) {
+ System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
+ //grows the positions buffers too
+ System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
+ }
+ this.commentStops[this.commentPtr] = stopPosition;
+ this.commentStarts[this.commentPtr] = this.startPosition;
+}
+// public final void recordComment(boolean isJavadoc) {
+// // a new annotation comment is recorded
+// try {
+// commentStops[++commentPtr] = isJavadoc
+// ? currentPosition
+// : -currentPosition;
+// } catch (IndexOutOfBoundsException e) {
+// int oldStackLength = commentStops.length;
+// int[] oldStack = commentStops;
+// commentStops = new int[oldStackLength + 30];
+// System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
+// commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
+// //grows the positions buffers too
+// int[] old = commentStarts;
+// commentStarts = new int[oldStackLength + 30];
+// System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
+// }
+// //the buffer is of a correct size here
+// commentStarts[commentPtr] = startPosition;
+// }
public void resetTo(int begin, int end) {
//reset the scanner to a given position where it may rescan again
diet = false;
initialPosition = currentPosition = 0;
containsAssertKeyword = false;
withoutUnicodeBuffer = new char[this.source.length];
+ encapsedStringStack = new Stack();
}
public String toString() {
if (startPosition == source.length)
return "case"; //$NON-NLS-1$
case TokenNameclass :
return "class"; //$NON-NLS-1$
+ case TokenNamecatch :
+ return "catch"; //$NON-NLS-1$
case TokenNameclone :
//$NON-NLS-1$
return "clone";
return "endswitch"; //$NON-NLS-1$
case TokenNameendwhile :
return "endwhile"; //$NON-NLS-1$
+ case TokenNameexit:
+ return "exit";
case TokenNameextends :
return "extends"; //$NON-NLS-1$
// case TokenNamefalse :
return "include"; //$NON-NLS-1$
case TokenNameinclude_once :
return "include_once"; //$NON-NLS-1$
+ case TokenNameinstanceof :
+ return "instanceof"; //$NON-NLS-1$
case TokenNameinterface :
return "interface"; //$NON-NLS-1$
case TokenNameisset :
return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameDoubleLiteral :
return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
- case TokenNameStringLiteral :
- return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
- case TokenNameStringConstant :
+ case TokenNameStringDoubleQuote :
+ return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
+ case TokenNameStringSingleQuote :
return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameStringInterpolated :
return "StringInterpolated(" + new String(getCurrentTokenSource())
+ ")"; //$NON-NLS-1$ //$NON-NLS-2$
+ case TokenNameEncapsedString0 :
+ return "`"; //$NON-NLS-1$
+ case TokenNameEncapsedString1 :
+ return "\'"; //$NON-NLS-1$
+ case TokenNameEncapsedString2 :
+ return "\""; //$NON-NLS-1$
+ case TokenNameSTRING :
+ return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
case TokenNameHEREDOC :
return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
case TokenNamePLUS_PLUS :
return "@";
case TokenNameDOLLAR :
return "$";
- // case TokenNameDOLLAR_LBRACE :
- // return "${";
+ case TokenNameDOLLAR_LBRACE :
+ return "${";
case TokenNameEOF :
return "EOF"; //$NON-NLS-1$
case TokenNameWHITESPACE :
return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
case TokenNameCOMMENT_PHPDOC :
return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
- // case TokenNameHTML :
- // return "HTML(" + new String(getCurrentTokenSource()) + ")";
- // //$NON-NLS-1$
+ // case TokenNameHTML :
+ // return "HTML(" + new String(getCurrentTokenSource()) + ")";
+ // //$NON-NLS-1$
case TokenNameFILE :
return "__FILE__"; //$NON-NLS-1$
case TokenNameLINE :
+ new String(getCurrentTokenSource()); //$NON-NLS-1$
}
}
+
+ public Scanner() {
+ this(false, false);
+ }
+ public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
+ this(tokenizeComments, tokenizeWhiteSpace, false);
+ }
public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
boolean checkNonExternalizedStringLiterals) {
this(tokenizeComments, tokenizeWhiteSpace,
}
public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
boolean checkNonExternalizedStringLiterals, boolean assertMode) {
+ this(tokenizeComments, tokenizeWhiteSpace,
+ checkNonExternalizedStringLiterals, assertMode, false, null, null);
+ }
+ public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
+ boolean checkNonExternalizedStringLiterals, boolean assertMode,
+ boolean tokenizeStrings,
+ char[][] taskTags,
+ char[][] taskPriorities) {
this.eofPosition = Integer.MAX_VALUE;
this.tokenizeComments = tokenizeComments;
this.tokenizeWhiteSpace = tokenizeWhiteSpace;
+ this.tokenizeStrings = tokenizeStrings;
this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
this.assertMode = assertMode;
+ this.encapsedStringStack = null;
+ this.taskTags = taskTags;
+ this.taskPriorities = taskPriorities;
}
private void checkNonExternalizeString() throws InvalidInputException {
if (currentLine == null)