1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
23 public class Scanner implements IScanner, ITerminalSymbols {
26 - getNextToken() which return the current type of the token
27 (this value is not memorized by the scanner)
28 - getCurrentTokenSource() which provides with the token "REAL" source
29 (aka all unicode have been transformed into a correct char)
30 - sourceStart gives the position into the stream
31 - currentPosition-1 gives the sourceEnd position into the stream
35 private boolean assertMode;
36 public boolean useAssertAsAnIndentifier = false;
37 //flag indicating if processed source contains occurrences of keyword assert
38 public boolean containsAssertKeyword = false;
40 public boolean recordLineSeparator;
41 public boolean phpMode = false;
43 public char currentCharacter;
44 public int startPosition;
45 public int currentPosition;
46 public int initialPosition, eofPosition;
47 // after this position eof are generated instead of real token from the source
49 public boolean tokenizeComments;
50 public boolean tokenizeWhiteSpace;
52 //source should be viewed as a window (aka a part)
53 //of a entire very large stream
57 public char[] withoutUnicodeBuffer;
58 public int withoutUnicodePtr;
59 //when == 0 ==> no unicode in the current token
60 public boolean unicodeAsBackSlash = false;
62 public boolean scanningFloatLiteral = false;
64 //support for /** comments
65 //public char[][] comments = new char[10][];
66 public int[] commentStops = new int[10];
67 public int[] commentStarts = new int[10];
68 public int commentPtr = -1; // no comment test with commentPtr value -1
70 //diet parsing support - jump over some method body when requested
71 public boolean diet = false;
73 //support for the poor-line-debuggers ....
74 //remember the position of the cr/lf
75 public int[] lineEnds = new int[250];
76 public int linePtr = -1;
77 public boolean wasAcr = false;
79 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
81 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
82 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
83 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
84 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
85 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
86 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
87 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
89 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
90 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
91 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
92 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
94 //----------------optimized identifier managment------------------
95 static final char[] charArray_a = new char[] { 'a' },
96 charArray_b = new char[] { 'b' },
97 charArray_c = new char[] { 'c' },
98 charArray_d = new char[] { 'd' },
99 charArray_e = new char[] { 'e' },
100 charArray_f = new char[] { 'f' },
101 charArray_g = new char[] { 'g' },
102 charArray_h = new char[] { 'h' },
103 charArray_i = new char[] { 'i' },
104 charArray_j = new char[] { 'j' },
105 charArray_k = new char[] { 'k' },
106 charArray_l = new char[] { 'l' },
107 charArray_m = new char[] { 'm' },
108 charArray_n = new char[] { 'n' },
109 charArray_o = new char[] { 'o' },
110 charArray_p = new char[] { 'p' },
111 charArray_q = new char[] { 'q' },
112 charArray_r = new char[] { 'r' },
113 charArray_s = new char[] { 's' },
114 charArray_t = new char[] { 't' },
115 charArray_u = new char[] { 'u' },
116 charArray_v = new char[] { 'v' },
117 charArray_w = new char[] { 'w' },
118 charArray_x = new char[] { 'x' },
119 charArray_y = new char[] { 'y' },
120 charArray_z = new char[] { 'z' };
122 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
123 static final int TableSize = 30, InternalTableSize = 6;
125 public static final int OptimizedLength = 6;
127 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
128 // support for detecting non-externalized string literals
129 int currentLineNr = -1;
130 int previousLineNr = -1;
131 NLSLine currentLine = null;
132 List lines = new ArrayList();
133 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
134 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
135 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
136 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
137 public StringLiteral[] nonNLSStrings = null;
138 public boolean checkNonExternalizedStringLiterals = true;
139 public boolean wasNonExternalizedStringLiteral = false;
142 for (int i = 0; i < 6; i++) {
143 for (int j = 0; j < TableSize; j++) {
144 for (int k = 0; k < InternalTableSize; k++) {
145 charArray_length[i][j][k] = initCharArray;
150 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
152 public static final int RoundBracket = 0;
153 public static final int SquareBracket = 1;
154 public static final int CurlyBracket = 2;
155 public static final int BracketKinds = 3;
158 public char[][] foundTaskTags = null;
159 public char[][] foundTaskMessages;
160 public char[][] foundTaskPriorities = null;
161 public int[][] foundTaskPositions;
162 public int foundTaskCount = 0;
163 public char[][] taskTags = null;
164 public char[][] taskPriorities = null;
166 public static final boolean DEBUG = false;
171 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
172 this(tokenizeComments, tokenizeWhiteSpace, false);
176 * Determines if the specified character is
177 * permissible as the first character in a PHP identifier
179 public static boolean isPHPIdentifierStart(char ch) {
180 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
184 * Determines if the specified character may be part of a PHP identifier as
185 * other than the first character
187 public static boolean isPHPIdentifierPart(char ch) {
188 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
191 public final boolean atEnd() {
192 // This code is not relevant if source is
193 // Only a part of the real stream input
195 return source.length == currentPosition;
197 public char[] getCurrentIdentifierSource() {
198 //return the token REAL source (aka unicodes are precomputed)
201 // if (withoutUnicodePtr != 0)
202 // //0 is used as a fast test flag so the real first char is in position 1
204 // withoutUnicodeBuffer,
206 // result = new char[withoutUnicodePtr],
208 // withoutUnicodePtr);
210 int length = currentPosition - startPosition;
211 switch (length) { // see OptimizedLength
213 return optimizedCurrentTokenSource1();
215 return optimizedCurrentTokenSource2();
217 return optimizedCurrentTokenSource3();
219 return optimizedCurrentTokenSource4();
221 return optimizedCurrentTokenSource5();
223 return optimizedCurrentTokenSource6();
226 System.arraycopy(source, startPosition, result = new char[length], 0, length);
230 public int getCurrentTokenEndPosition() {
231 return this.currentPosition - 1;
234 public final char[] getCurrentTokenSource() {
235 // Return the token REAL source (aka unicodes are precomputed)
238 // if (withoutUnicodePtr != 0)
239 // // 0 is used as a fast test flag so the real first char is in position 1
241 // withoutUnicodeBuffer,
243 // result = new char[withoutUnicodePtr],
245 // withoutUnicodePtr);
248 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
253 public final char[] getCurrentTokenSource(int startPos) {
254 // Return the token REAL source (aka unicodes are precomputed)
257 // if (withoutUnicodePtr != 0)
258 // // 0 is used as a fast test flag so the real first char is in position 1
260 // withoutUnicodeBuffer,
262 // result = new char[withoutUnicodePtr],
264 // withoutUnicodePtr);
267 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
272 public final char[] getCurrentTokenSourceString() {
273 //return the token REAL source (aka unicodes are precomputed).
274 //REMOVE the two " that are at the beginning and the end.
277 if (withoutUnicodePtr != 0)
278 //0 is used as a fast test flag so the real first char is in position 1
279 System.arraycopy(withoutUnicodeBuffer, 2,
280 //2 is 1 (real start) + 1 (to jump over the ")
281 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
284 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
288 public int getCurrentTokenStartPosition() {
289 return this.startPosition;
292 public final char[] getCurrentStringLiteralSource() {
293 // Return the token REAL source (aka unicodes are precomputed)
298 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
304 * Search the source position corresponding to the end of a given line number
306 * Line numbers are 1-based, and relative to the scanner initialPosition.
307 * Character positions are 0-based.
309 * In case the given line number is inconsistent, answers -1.
311 public final int getLineEnd(int lineNumber) {
313 if (lineEnds == null)
315 if (lineNumber >= lineEnds.length)
320 if (lineNumber == lineEnds.length - 1)
322 return lineEnds[lineNumber - 1];
323 // next line start one character behind the lineEnd of the previous line
326 * Search the source position corresponding to the beginning of a given line number
328 * Line numbers are 1-based, and relative to the scanner initialPosition.
329 * Character positions are 0-based.
331 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
333 * In case the given line number is inconsistent, answers -1.
335 public final int getLineStart(int lineNumber) {
337 if (lineEnds == null)
339 if (lineNumber >= lineEnds.length)
345 return initialPosition;
346 return lineEnds[lineNumber - 2] + 1;
347 // next line start one character behind the lineEnd of the previous line
349 public final boolean getNextChar(char testedChar) {
351 //handle the case of unicode.
352 //when a unicode appears then we must use a buffer that holds char internal values
353 //At the end of this method currentCharacter holds the new visited char
354 //and currentPosition points right next after it
355 //Both previous lines are true if the currentCharacter is == to the testedChar
356 //On false, no side effect has occured.
358 //ALL getNextChar.... ARE OPTIMIZED COPIES
360 int temp = currentPosition;
362 currentCharacter = source[currentPosition++];
363 // if (((currentCharacter = source[currentPosition++]) == '\\')
364 // && (source[currentPosition] == 'u')) {
365 // //-------------unicode traitement ------------
366 // int c1, c2, c3, c4;
367 // int unicodeSize = 6;
368 // currentPosition++;
369 // while (source[currentPosition] == 'u') {
370 // currentPosition++;
374 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
376 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
378 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
380 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
382 // currentPosition = temp;
386 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
387 // if (currentCharacter != testedChar) {
388 // currentPosition = temp;
391 // unicodeAsBackSlash = currentCharacter == '\\';
393 // //need the unicode buffer
394 // if (withoutUnicodePtr == 0) {
395 // //buffer all the entries that have been left aside....
396 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
400 // withoutUnicodeBuffer,
402 // withoutUnicodePtr);
404 // //fill the buffer with the char
405 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
408 // } //-------------end unicode traitement--------------
410 if (currentCharacter != testedChar) {
411 currentPosition = temp;
414 unicodeAsBackSlash = false;
415 // if (withoutUnicodePtr != 0)
416 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
419 } catch (IndexOutOfBoundsException e) {
420 unicodeAsBackSlash = false;
421 currentPosition = temp;
425 public final int getNextChar(char testedChar1, char testedChar2) {
426 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
427 //test can be done with (x==0) for the first and (x>0) for the second
428 //handle the case of unicode.
429 //when a unicode appears then we must use a buffer that holds char internal values
430 //At the end of this method currentCharacter holds the new visited char
431 //and currentPosition points right next after it
432 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
433 //On false, no side effect has occured.
435 //ALL getNextChar.... ARE OPTIMIZED COPIES
437 int temp = currentPosition;
440 currentCharacter = source[currentPosition++];
441 // if (((currentCharacter = source[currentPosition++]) == '\\')
442 // && (source[currentPosition] == 'u')) {
443 // //-------------unicode traitement ------------
444 // int c1, c2, c3, c4;
445 // int unicodeSize = 6;
446 // currentPosition++;
447 // while (source[currentPosition] == 'u') {
448 // currentPosition++;
452 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
454 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
456 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
458 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
460 // currentPosition = temp;
464 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
465 // if (currentCharacter == testedChar1)
467 // else if (currentCharacter == testedChar2)
470 // currentPosition = temp;
474 // //need the unicode buffer
475 // if (withoutUnicodePtr == 0) {
476 // //buffer all the entries that have been left aside....
477 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
481 // withoutUnicodeBuffer,
483 // withoutUnicodePtr);
485 // //fill the buffer with the char
486 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
488 // } //-------------end unicode traitement--------------
490 if (currentCharacter == testedChar1)
492 else if (currentCharacter == testedChar2)
495 currentPosition = temp;
499 // if (withoutUnicodePtr != 0)
500 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
503 } catch (IndexOutOfBoundsException e) {
504 currentPosition = temp;
508 public final boolean getNextCharAsDigit() {
510 //handle the case of unicode.
511 //when a unicode appears then we must use a buffer that holds char internal values
512 //At the end of this method currentCharacter holds the new visited char
513 //and currentPosition points right next after it
514 //Both previous lines are true if the currentCharacter is a digit
515 //On false, no side effect has occured.
517 //ALL getNextChar.... ARE OPTIMIZED COPIES
519 int temp = currentPosition;
521 currentCharacter = source[currentPosition++];
522 // if (((currentCharacter = source[currentPosition++]) == '\\')
523 // && (source[currentPosition] == 'u')) {
524 // //-------------unicode traitement ------------
525 // int c1, c2, c3, c4;
526 // int unicodeSize = 6;
527 // currentPosition++;
528 // while (source[currentPosition] == 'u') {
529 // currentPosition++;
533 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
535 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
537 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
539 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
541 // currentPosition = temp;
545 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
546 // if (!Character.isDigit(currentCharacter)) {
547 // currentPosition = temp;
551 // //need the unicode buffer
552 // if (withoutUnicodePtr == 0) {
553 // //buffer all the entries that have been left aside....
554 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
558 // withoutUnicodeBuffer,
560 // withoutUnicodePtr);
562 // //fill the buffer with the char
563 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
565 // } //-------------end unicode traitement--------------
567 if (!Character.isDigit(currentCharacter)) {
568 currentPosition = temp;
571 // if (withoutUnicodePtr != 0)
572 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
575 } catch (IndexOutOfBoundsException e) {
576 currentPosition = temp;
580 public final boolean getNextCharAsDigit(int radix) {
582 //handle the case of unicode.
583 //when a unicode appears then we must use a buffer that holds char internal values
584 //At the end of this method currentCharacter holds the new visited char
585 //and currentPosition points right next after it
586 //Both previous lines are true if the currentCharacter is a digit base on radix
587 //On false, no side effect has occured.
589 //ALL getNextChar.... ARE OPTIMIZED COPIES
591 int temp = currentPosition;
593 currentCharacter = source[currentPosition++];
594 // if (((currentCharacter = source[currentPosition++]) == '\\')
595 // && (source[currentPosition] == 'u')) {
596 // //-------------unicode traitement ------------
597 // int c1, c2, c3, c4;
598 // int unicodeSize = 6;
599 // currentPosition++;
600 // while (source[currentPosition] == 'u') {
601 // currentPosition++;
605 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
607 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
609 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
611 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
613 // currentPosition = temp;
617 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
618 // if (Character.digit(currentCharacter, radix) == -1) {
619 // currentPosition = temp;
623 // //need the unicode buffer
624 // if (withoutUnicodePtr == 0) {
625 // //buffer all the entries that have been left aside....
626 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
630 // withoutUnicodeBuffer,
632 // withoutUnicodePtr);
634 // //fill the buffer with the char
635 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
637 // } //-------------end unicode traitement--------------
639 if (Character.digit(currentCharacter, radix) == -1) {
640 currentPosition = temp;
643 // if (withoutUnicodePtr != 0)
644 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
647 } catch (IndexOutOfBoundsException e) {
648 currentPosition = temp;
652 public boolean getNextCharAsJavaIdentifierPart() {
654 //handle the case of unicode.
655 //when a unicode appears then we must use a buffer that holds char internal values
656 //At the end of this method currentCharacter holds the new visited char
657 //and currentPosition points right next after it
658 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
659 //On false, no side effect has occured.
661 //ALL getNextChar.... ARE OPTIMIZED COPIES
663 int temp = currentPosition;
665 currentCharacter = source[currentPosition++];
666 // if (((currentCharacter = source[currentPosition++]) == '\\')
667 // && (source[currentPosition] == 'u')) {
668 // //-------------unicode traitement ------------
669 // int c1, c2, c3, c4;
670 // int unicodeSize = 6;
671 // currentPosition++;
672 // while (source[currentPosition] == 'u') {
673 // currentPosition++;
677 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
679 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
681 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
683 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
685 // currentPosition = temp;
689 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
690 // if (!isPHPIdentifierPart(currentCharacter)) {
691 // currentPosition = temp;
695 // //need the unicode buffer
696 // if (withoutUnicodePtr == 0) {
697 // //buffer all the entries that have been left aside....
698 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
702 // withoutUnicodeBuffer,
704 // withoutUnicodePtr);
706 // //fill the buffer with the char
707 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
709 // } //-------------end unicode traitement--------------
711 if (!isPHPIdentifierPart(currentCharacter)) {
712 currentPosition = temp;
716 // if (withoutUnicodePtr != 0)
717 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
720 } catch (IndexOutOfBoundsException e) {
721 currentPosition = temp;
726 public int getNextToken() throws InvalidInputException {
727 int htmlPosition = currentPosition;
730 currentCharacter = source[currentPosition++];
731 if (currentCharacter == '<') {
732 if (getNextChar('?')) {
733 currentCharacter = source[currentPosition++];
734 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
736 startPosition = currentPosition;
738 if (tokenizeWhiteSpace) {
739 // && (whiteStart != currentPosition - 1)) {
740 // reposition scanner in case we are interested by spaces as tokens
741 startPosition = htmlPosition;
742 return TokenNameHTML;
745 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
747 int test = getNextChar('H', 'h');
749 test = getNextChar('P', 'p');
752 startPosition = currentPosition;
755 if (tokenizeWhiteSpace) {
756 // && (whiteStart != currentPosition - 1)) {
757 // reposition scanner in case we are interested by spaces as tokens
758 startPosition = htmlPosition;
759 return TokenNameHTML;
768 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
769 if (recordLineSeparator) {
776 } //-----------------end switch while try--------------------
777 catch (IndexOutOfBoundsException e) {
778 if (tokenizeWhiteSpace) {
779 // && (whiteStart != currentPosition - 1)) {
780 // reposition scanner in case we are interested by spaces as tokens
781 startPosition = htmlPosition;
789 jumpOverMethodBody();
791 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
794 while (true) { //loop for jumping over comments
795 withoutUnicodePtr = 0;
796 //start with a new token (even comment written with unicode )
798 // ---------Consume white space and handles startPosition---------
799 int whiteStart = currentPosition;
800 boolean isWhiteSpace;
802 startPosition = currentPosition;
803 currentCharacter = source[currentPosition++];
804 // if (((currentCharacter = source[currentPosition++]) == '\\')
805 // && (source[currentPosition] == 'u')) {
806 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
808 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
809 checkNonExternalizeString();
810 if (recordLineSeparator) {
816 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
818 } while (isWhiteSpace);
819 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
820 // reposition scanner in case we are interested by spaces as tokens
822 startPosition = whiteStart;
823 return TokenNameWHITESPACE;
825 //little trick to get out in the middle of a source compuation
826 if (currentPosition > eofPosition)
829 // ---------Identify the next token-------------
831 switch (currentCharacter) {
833 return TokenNameLPAREN;
835 return TokenNameRPAREN;
837 return TokenNameLBRACE;
839 return TokenNameRBRACE;
841 return TokenNameLBRACKET;
843 return TokenNameRBRACKET;
845 return TokenNameSEMICOLON;
847 return TokenNameCOMMA;
850 if (getNextCharAsDigit())
851 return scanNumber(true);
856 if ((test = getNextChar('+', '=')) == 0)
857 return TokenNamePLUS_PLUS;
859 return TokenNamePLUS_EQUAL;
860 return TokenNamePLUS;
865 if ((test = getNextChar('-', '=')) == 0)
866 return TokenNameMINUS_MINUS;
868 return TokenNameMINUS_EQUAL;
869 if (getNextChar('>'))
870 return TokenNameMINUS_GREATER;
872 return TokenNameMINUS;
875 if (getNextChar('='))
876 return TokenNameTWIDDLE_EQUAL;
877 return TokenNameTWIDDLE;
879 if (getNextChar('=')) {
880 if (getNextChar('=')) {
881 return TokenNameNOT_EQUAL_EQUAL;
883 return TokenNameNOT_EQUAL;
887 if (getNextChar('='))
888 return TokenNameMULTIPLY_EQUAL;
889 return TokenNameMULTIPLY;
891 if (getNextChar('='))
892 return TokenNameREMAINDER_EQUAL;
893 return TokenNameREMAINDER;
897 if ((test = getNextChar('=', '<')) == 0)
898 return TokenNameLESS_EQUAL;
900 if (getNextChar('='))
901 return TokenNameLEFT_SHIFT_EQUAL;
902 if (getNextChar('<')) {
903 int heredocStart = currentPosition;
904 int heredocLength = 0;
905 currentCharacter = source[currentPosition++];
906 if (isPHPIdentifierStart(currentCharacter)) {
907 currentCharacter = source[currentPosition++];
909 return TokenNameERROR;
911 while (isPHPIdentifierPart(currentCharacter)) {
912 currentCharacter = source[currentPosition++];
915 heredocLength = currentPosition - heredocStart - 1;
917 // heredoc end-tag determination
918 boolean endTag = true;
921 ch = source[currentPosition++];
922 if (ch == '\r' || ch == '\n') {
923 if (recordLineSeparator) {
928 for (int i = 0; i < heredocLength; i++) {
929 if (source[currentPosition + i] != source[heredocStart + i]) {
935 currentPosition += heredocLength - 1;
936 currentCharacter = source[currentPosition++];
937 break; // do...while loop
945 return TokenNameHEREDOC;
947 return TokenNameLEFT_SHIFT;
949 return TokenNameLESS;
954 if ((test = getNextChar('=', '>')) == 0)
955 return TokenNameGREATER_EQUAL;
957 if ((test = getNextChar('=', '>')) == 0)
958 return TokenNameRIGHT_SHIFT_EQUAL;
959 return TokenNameRIGHT_SHIFT;
961 return TokenNameGREATER;
964 if (getNextChar('=')) {
965 if (getNextChar('=')) {
966 return TokenNameEQUAL_EQUAL_EQUAL;
968 return TokenNameEQUAL_EQUAL;
970 if (getNextChar('>'))
971 return TokenNameEQUAL_GREATER;
972 return TokenNameEQUAL;
976 if ((test = getNextChar('&', '=')) == 0)
977 return TokenNameAND_AND;
979 return TokenNameAND_EQUAL;
985 if ((test = getNextChar('|', '=')) == 0)
986 return TokenNameOR_OR;
988 return TokenNameOR_EQUAL;
992 if (getNextChar('='))
993 return TokenNameXOR_EQUAL;
996 if (getNextChar('>')) {
998 return TokenNameStopPHP;
1000 return TokenNameQUESTION;
1002 if (getNextChar(':'))
1003 return TokenNameCOLON_COLON;
1004 return TokenNameCOLON;
1010 // if ((test = getNextChar('\n', '\r')) == 0) {
1011 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1014 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1015 // for (int lookAhead = 0;
1018 // if (currentPosition + lookAhead
1019 // == source.length)
1021 // if (source[currentPosition + lookAhead]
1024 // if (source[currentPosition + lookAhead]
1026 // currentPosition += lookAhead + 1;
1030 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1033 // if (getNextChar('\'')) {
1034 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1035 // for (int lookAhead = 0;
1038 // if (currentPosition + lookAhead
1039 // == source.length)
1041 // if (source[currentPosition + lookAhead]
1044 // if (source[currentPosition + lookAhead]
1046 // currentPosition += lookAhead + 1;
1050 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1052 // if (getNextChar('\\'))
1053 // scanEscapeCharacter();
1054 // else { // consume next character
1055 // unicodeAsBackSlash = false;
1056 // if (((currentCharacter = source[currentPosition++])
1058 // && (source[currentPosition] == 'u')) {
1059 // getNextUnicodeChar();
1061 // if (withoutUnicodePtr != 0) {
1062 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1063 // currentCharacter;
1067 // // if (getNextChar('\''))
1068 // // return TokenNameCharacterLiteral;
1069 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1070 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1071 // if (currentPosition + lookAhead == source.length)
1073 // if (source[currentPosition + lookAhead] == '\n')
1075 // if (source[currentPosition + lookAhead] == '\'') {
1076 // currentPosition += lookAhead + 1;
1080 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1083 // consume next character
1084 unicodeAsBackSlash = false;
1085 currentCharacter = source[currentPosition++];
1086 // if (((currentCharacter = source[currentPosition++]) == '\\')
1087 // && (source[currentPosition] == 'u')) {
1088 // getNextUnicodeChar();
1090 // if (withoutUnicodePtr != 0) {
1091 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1092 // currentCharacter;
1096 while (currentCharacter != '\'') {
1098 /**** in PHP \r and \n are valid in string literals ****/
1099 // if ((currentCharacter == '\n')
1100 // || (currentCharacter == '\r')) {
1101 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1102 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1103 // if (currentPosition + lookAhead == source.length)
1105 // if (source[currentPosition + lookAhead] == '\n')
1107 // if (source[currentPosition + lookAhead] == '\"') {
1108 // currentPosition += lookAhead + 1;
1112 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1114 if (currentCharacter == '\\') {
1115 int escapeSize = currentPosition;
1116 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1117 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1118 scanSingleQuotedEscapeCharacter();
1119 escapeSize = currentPosition - escapeSize;
1120 if (withoutUnicodePtr == 0) {
1121 //buffer all the entries that have been left aside....
1122 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1123 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1124 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1125 } else { //overwrite the / in the buffer
1126 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1127 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1128 withoutUnicodePtr--;
1132 // consume next character
1133 unicodeAsBackSlash = false;
1134 currentCharacter = source[currentPosition++];
1135 // if (((currentCharacter = source[currentPosition++]) == '\\')
1136 // && (source[currentPosition] == 'u')) {
1137 // getNextUnicodeChar();
1139 if (withoutUnicodePtr != 0) {
1140 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1145 } catch (IndexOutOfBoundsException e) {
1146 throw new InvalidInputException(UNTERMINATED_STRING);
1147 } catch (InvalidInputException e) {
1148 if (e.getMessage().equals(INVALID_ESCAPE)) {
1149 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1150 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1151 if (currentPosition + lookAhead == source.length)
1153 if (source[currentPosition + lookAhead] == '\n')
1155 if (source[currentPosition + lookAhead] == '\'') {
1156 currentPosition += lookAhead + 1;
1164 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1165 if (currentLine == null) {
1166 currentLine = new NLSLine();
1167 lines.add(currentLine);
1169 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1171 return TokenNameStringConstant;
1174 // consume next character
1175 unicodeAsBackSlash = false;
1176 currentCharacter = source[currentPosition++];
1177 // if (((currentCharacter = source[currentPosition++]) == '\\')
1178 // && (source[currentPosition] == 'u')) {
1179 // getNextUnicodeChar();
1181 // if (withoutUnicodePtr != 0) {
1182 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1183 // currentCharacter;
1187 while (currentCharacter != '"') {
1189 /**** in PHP \r and \n are valid in string literals ****/
1190 // if ((currentCharacter == '\n')
1191 // || (currentCharacter == '\r')) {
1192 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1193 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1194 // if (currentPosition + lookAhead == source.length)
1196 // if (source[currentPosition + lookAhead] == '\n')
1198 // if (source[currentPosition + lookAhead] == '\"') {
1199 // currentPosition += lookAhead + 1;
1203 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1205 if (currentCharacter == '\\') {
1206 int escapeSize = currentPosition;
1207 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1208 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1209 scanDoubleQuotedEscapeCharacter();
1210 escapeSize = currentPosition - escapeSize;
1211 if (withoutUnicodePtr == 0) {
1212 //buffer all the entries that have been left aside....
1213 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1214 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1215 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1216 } else { //overwrite the / in the buffer
1217 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1218 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1219 withoutUnicodePtr--;
1223 // consume next character
1224 unicodeAsBackSlash = false;
1225 currentCharacter = source[currentPosition++];
1226 // if (((currentCharacter = source[currentPosition++]) == '\\')
1227 // && (source[currentPosition] == 'u')) {
1228 // getNextUnicodeChar();
1230 if (withoutUnicodePtr != 0) {
1231 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1236 } catch (IndexOutOfBoundsException e) {
1237 throw new InvalidInputException(UNTERMINATED_STRING);
1238 } catch (InvalidInputException e) {
1239 if (e.getMessage().equals(INVALID_ESCAPE)) {
1240 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1241 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1242 if (currentPosition + lookAhead == source.length)
1244 if (source[currentPosition + lookAhead] == '\n')
1246 if (source[currentPosition + lookAhead] == '\"') {
1247 currentPosition += lookAhead + 1;
1255 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1256 if (currentLine == null) {
1257 currentLine = new NLSLine();
1258 lines.add(currentLine);
1260 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1262 return TokenNameStringLiteral;
1265 // consume next character
1266 unicodeAsBackSlash = false;
1267 currentCharacter = source[currentPosition++];
1268 // if (((currentCharacter = source[currentPosition++]) == '\\')
1269 // && (source[currentPosition] == 'u')) {
1270 // getNextUnicodeChar();
1272 // if (withoutUnicodePtr != 0) {
1273 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1274 // currentCharacter;
1278 while (currentCharacter != '`') {
1280 /**** in PHP \r and \n are valid in string literals ****/
1281 // if ((currentCharacter == '\n')
1282 // || (currentCharacter == '\r')) {
1283 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1284 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1285 // if (currentPosition + lookAhead == source.length)
1287 // if (source[currentPosition + lookAhead] == '\n')
1289 // if (source[currentPosition + lookAhead] == '\"') {
1290 // currentPosition += lookAhead + 1;
1294 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1296 if (currentCharacter == '\\') {
1297 int escapeSize = currentPosition;
1298 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1299 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1300 scanDoubleQuotedEscapeCharacter();
1301 escapeSize = currentPosition - escapeSize;
1302 if (withoutUnicodePtr == 0) {
1303 //buffer all the entries that have been left aside....
1304 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1305 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1306 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1307 } else { //overwrite the / in the buffer
1308 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1309 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1310 withoutUnicodePtr--;
1314 // consume next character
1315 unicodeAsBackSlash = false;
1316 currentCharacter = source[currentPosition++];
1317 // if (((currentCharacter = source[currentPosition++]) == '\\')
1318 // && (source[currentPosition] == 'u')) {
1319 // getNextUnicodeChar();
1321 if (withoutUnicodePtr != 0) {
1322 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1327 } catch (IndexOutOfBoundsException e) {
1328 throw new InvalidInputException(UNTERMINATED_STRING);
1329 } catch (InvalidInputException e) {
1330 if (e.getMessage().equals(INVALID_ESCAPE)) {
1331 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1332 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1333 if (currentPosition + lookAhead == source.length)
1335 if (source[currentPosition + lookAhead] == '\n')
1337 if (source[currentPosition + lookAhead] == '`') {
1338 currentPosition += lookAhead + 1;
1346 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1347 if (currentLine == null) {
1348 currentLine = new NLSLine();
1349 lines.add(currentLine);
1351 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1353 return TokenNameStringInterpolated;
1358 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1360 int endPositionForLineComment = 0;
1361 try { //get the next char
1362 currentCharacter = source[currentPosition++];
1363 // if (((currentCharacter = source[currentPosition++])
1365 // && (source[currentPosition] == 'u')) {
1366 // //-------------unicode traitement ------------
1367 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1368 // currentPosition++;
1369 // while (source[currentPosition] == 'u') {
1370 // currentPosition++;
1373 // Character.getNumericValue(source[currentPosition++]))
1377 // Character.getNumericValue(source[currentPosition++]))
1381 // Character.getNumericValue(source[currentPosition++]))
1385 // Character.getNumericValue(source[currentPosition++]))
1388 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1390 // currentCharacter =
1391 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1395 //handle the \\u case manually into comment
1396 // if (currentCharacter == '\\') {
1397 // if (source[currentPosition] == '\\')
1398 // currentPosition++;
1399 // } //jump over the \\
1400 boolean isUnicode = false;
1401 while (currentCharacter != '\r' && currentCharacter != '\n') {
1402 if (currentCharacter == '?') {
1403 if (getNextChar('>')) {
1404 startPosition = currentPosition - 2;
1406 return TokenNameStopPHP;
1412 currentCharacter = source[currentPosition++];
1413 // if (((currentCharacter = source[currentPosition++])
1415 // && (source[currentPosition] == 'u')) {
1416 // isUnicode = true;
1417 // //-------------unicode traitement ------------
1418 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1419 // currentPosition++;
1420 // while (source[currentPosition] == 'u') {
1421 // currentPosition++;
1424 // Character.getNumericValue(source[currentPosition++]))
1428 // Character.getNumericValue(
1429 // source[currentPosition++]))
1433 // Character.getNumericValue(
1434 // source[currentPosition++]))
1438 // Character.getNumericValue(
1439 // source[currentPosition++]))
1442 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1444 // currentCharacter =
1445 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1448 //handle the \\u case manually into comment
1449 // if (currentCharacter == '\\') {
1450 // if (source[currentPosition] == '\\')
1451 // currentPosition++;
1452 // } //jump over the \\
1455 endPositionForLineComment = currentPosition - 6;
1457 endPositionForLineComment = currentPosition - 1;
1459 recordComment(false);
1460 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1461 checkNonExternalizeString();
1462 if (recordLineSeparator) {
1464 pushUnicodeLineSeparator();
1466 pushLineSeparator();
1472 if (tokenizeComments) {
1474 currentPosition = endPositionForLineComment;
1475 // reset one character behind
1477 return TokenNameCOMMENT_LINE;
1479 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1480 if (tokenizeComments) {
1482 // reset one character behind
1483 return TokenNameCOMMENT_LINE;
1489 //traditional and annotation comment
1490 boolean isJavadoc = false, star = false;
1491 // consume next character
1492 unicodeAsBackSlash = false;
1493 currentCharacter = source[currentPosition++];
1494 // if (((currentCharacter = source[currentPosition++]) == '\\')
1495 // && (source[currentPosition] == 'u')) {
1496 // getNextUnicodeChar();
1498 // if (withoutUnicodePtr != 0) {
1499 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1500 // currentCharacter;
1504 if (currentCharacter == '*') {
1508 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1509 checkNonExternalizeString();
1510 if (recordLineSeparator) {
1511 pushLineSeparator();
1516 try { //get the next char
1517 currentCharacter = source[currentPosition++];
1518 // if (((currentCharacter = source[currentPosition++])
1520 // && (source[currentPosition] == 'u')) {
1521 // //-------------unicode traitement ------------
1522 // getNextUnicodeChar();
1524 //handle the \\u case manually into comment
1525 // if (currentCharacter == '\\') {
1526 // if (source[currentPosition] == '\\')
1527 // currentPosition++;
1528 // //jump over the \\
1530 // empty comment is not a javadoc /**/
1531 if (currentCharacter == '/') {
1534 //loop until end of comment */
1535 while ((currentCharacter != '/') || (!star)) {
1536 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1537 checkNonExternalizeString();
1538 if (recordLineSeparator) {
1539 pushLineSeparator();
1544 star = currentCharacter == '*';
1546 currentCharacter = source[currentPosition++];
1547 // if (((currentCharacter = source[currentPosition++])
1549 // && (source[currentPosition] == 'u')) {
1550 // //-------------unicode traitement ------------
1551 // getNextUnicodeChar();
1553 //handle the \\u case manually into comment
1554 // if (currentCharacter == '\\') {
1555 // if (source[currentPosition] == '\\')
1556 // currentPosition++;
1557 // } //jump over the \\
1559 recordComment(isJavadoc);
1560 if (tokenizeComments) {
1562 return TokenNameCOMMENT_PHPDOC;
1563 return TokenNameCOMMENT_BLOCK;
1565 } catch (IndexOutOfBoundsException e) {
1566 throw new InvalidInputException(UNTERMINATED_COMMENT);
1570 if (getNextChar('='))
1571 return TokenNameDIVIDE_EQUAL;
1572 return TokenNameDIVIDE;
1576 return TokenNameEOF;
1577 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1578 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1581 if (currentCharacter == '$') {
1582 while ((currentCharacter = source[currentPosition++]) == '$') {
1584 if (currentCharacter == '{')
1585 return TokenNameDOLLAR_LBRACE;
1586 if (isPHPIdentifierStart(currentCharacter))
1587 return scanIdentifierOrKeyword(true);
1588 return TokenNameERROR;
1590 if (isPHPIdentifierStart(currentCharacter))
1591 return scanIdentifierOrKeyword(false);
1592 if (Character.isDigit(currentCharacter))
1593 return scanNumber(false);
1594 return TokenNameERROR;
1597 } //-----------------end switch while try--------------------
1598 catch (IndexOutOfBoundsException e) {
1601 return TokenNameEOF;
1604 // public final void getNextUnicodeChar()
1605 // throws IndexOutOfBoundsException, InvalidInputException {
1607 // //handle the case of unicode.
1608 // //when a unicode appears then we must use a buffer that holds char internal values
1609 // //At the end of this method currentCharacter holds the new visited char
1610 // //and currentPosition points right next after it
1612 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1614 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1615 // currentPosition++;
1616 // while (source[currentPosition] == 'u') {
1617 // currentPosition++;
1621 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1623 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1625 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1627 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1629 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1631 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1632 // //need the unicode buffer
1633 // if (withoutUnicodePtr == 0) {
1634 // //buffer all the entries that have been left aside....
1635 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1636 // System.arraycopy(
1639 // withoutUnicodeBuffer,
1641 // withoutUnicodePtr);
1643 // //fill the buffer with the char
1644 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1646 // unicodeAsBackSlash = currentCharacter == '\\';
1648 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1650 public final void jumpOverMethodBody() {
1652 this.wasAcr = false;
1655 while (true) { //loop for jumping over comments
1656 // ---------Consume white space and handles startPosition---------
1657 boolean isWhiteSpace;
1659 startPosition = currentPosition;
1660 currentCharacter = source[currentPosition++];
1661 // if (((currentCharacter = source[currentPosition++]) == '\\')
1662 // && (source[currentPosition] == 'u')) {
1663 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1665 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1666 pushLineSeparator();
1667 isWhiteSpace = Character.isWhitespace(currentCharacter);
1669 } while (isWhiteSpace);
1671 // -------consume token until } is found---------
1672 switch (currentCharacter) {
1684 test = getNextChar('\\');
1687 scanDoubleQuotedEscapeCharacter();
1688 } catch (InvalidInputException ex) {
1691 // try { // consume next character
1692 unicodeAsBackSlash = false;
1693 currentCharacter = source[currentPosition++];
1694 // if (((currentCharacter = source[currentPosition++]) == '\\')
1695 // && (source[currentPosition] == 'u')) {
1696 // getNextUnicodeChar();
1698 if (withoutUnicodePtr != 0) {
1699 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1702 // } catch (InvalidInputException ex) {
1710 // try { // consume next character
1711 unicodeAsBackSlash = false;
1712 currentCharacter = source[currentPosition++];
1713 // if (((currentCharacter = source[currentPosition++]) == '\\')
1714 // && (source[currentPosition] == 'u')) {
1715 // getNextUnicodeChar();
1717 if (withoutUnicodePtr != 0) {
1718 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1721 // } catch (InvalidInputException ex) {
1723 while (currentCharacter != '"') {
1724 if (currentCharacter == '\r') {
1725 if (source[currentPosition] == '\n')
1728 // the string cannot go further that the line
1730 if (currentCharacter == '\n') {
1732 // the string cannot go further that the line
1734 if (currentCharacter == '\\') {
1736 scanDoubleQuotedEscapeCharacter();
1737 } catch (InvalidInputException ex) {
1740 // try { // consume next character
1741 unicodeAsBackSlash = false;
1742 currentCharacter = source[currentPosition++];
1743 // if (((currentCharacter = source[currentPosition++]) == '\\')
1744 // && (source[currentPosition] == 'u')) {
1745 // getNextUnicodeChar();
1747 if (withoutUnicodePtr != 0) {
1748 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1751 // } catch (InvalidInputException ex) {
1754 } catch (IndexOutOfBoundsException e) {
1761 if ((test = getNextChar('/', '*')) == 0) {
1765 currentCharacter = source[currentPosition++];
1766 // if (((currentCharacter = source[currentPosition++]) == '\\')
1767 // && (source[currentPosition] == 'u')) {
1768 // //-------------unicode traitement ------------
1769 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1770 // currentPosition++;
1771 // while (source[currentPosition] == 'u') {
1772 // currentPosition++;
1775 // Character.getNumericValue(source[currentPosition++]))
1779 // Character.getNumericValue(source[currentPosition++]))
1783 // Character.getNumericValue(source[currentPosition++]))
1787 // Character.getNumericValue(source[currentPosition++]))
1790 // //error don't care of the value
1791 // currentCharacter = 'A';
1792 // } //something different from \n and \r
1794 // currentCharacter =
1795 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1799 while (currentCharacter != '\r' && currentCharacter != '\n') {
1801 currentCharacter = source[currentPosition++];
1802 // if (((currentCharacter = source[currentPosition++])
1804 // && (source[currentPosition] == 'u')) {
1805 // //-------------unicode traitement ------------
1806 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1807 // currentPosition++;
1808 // while (source[currentPosition] == 'u') {
1809 // currentPosition++;
1812 // Character.getNumericValue(source[currentPosition++]))
1816 // Character.getNumericValue(source[currentPosition++]))
1820 // Character.getNumericValue(source[currentPosition++]))
1824 // Character.getNumericValue(source[currentPosition++]))
1827 // //error don't care of the value
1828 // currentCharacter = 'A';
1829 // } //something different from \n and \r
1831 // currentCharacter =
1832 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1836 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1837 pushLineSeparator();
1838 } catch (IndexOutOfBoundsException e) {
1839 } //an eof will them be generated
1843 //traditional and annotation comment
1844 boolean star = false;
1845 // try { // consume next character
1846 unicodeAsBackSlash = false;
1847 currentCharacter = source[currentPosition++];
1848 // if (((currentCharacter = source[currentPosition++]) == '\\')
1849 // && (source[currentPosition] == 'u')) {
1850 // getNextUnicodeChar();
1852 if (withoutUnicodePtr != 0) {
1853 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1856 // } catch (InvalidInputException ex) {
1858 if (currentCharacter == '*') {
1861 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1862 pushLineSeparator();
1863 try { //get the next char
1864 currentCharacter = source[currentPosition++];
1865 // if (((currentCharacter = source[currentPosition++]) == '\\')
1866 // && (source[currentPosition] == 'u')) {
1867 // //-------------unicode traitement ------------
1868 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1869 // currentPosition++;
1870 // while (source[currentPosition] == 'u') {
1871 // currentPosition++;
1874 // Character.getNumericValue(source[currentPosition++]))
1878 // Character.getNumericValue(source[currentPosition++]))
1882 // Character.getNumericValue(source[currentPosition++]))
1886 // Character.getNumericValue(source[currentPosition++]))
1889 // //error don't care of the value
1890 // currentCharacter = 'A';
1891 // } //something different from * and /
1893 // currentCharacter =
1894 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1897 //loop until end of comment */
1898 while ((currentCharacter != '/') || (!star)) {
1899 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1900 pushLineSeparator();
1901 star = currentCharacter == '*';
1903 currentCharacter = source[currentPosition++];
1904 // if (((currentCharacter = source[currentPosition++])
1906 // && (source[currentPosition] == 'u')) {
1907 // //-------------unicode traitement ------------
1908 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1909 // currentPosition++;
1910 // while (source[currentPosition] == 'u') {
1911 // currentPosition++;
1914 // Character.getNumericValue(source[currentPosition++]))
1918 // Character.getNumericValue(source[currentPosition++]))
1922 // Character.getNumericValue(source[currentPosition++]))
1926 // Character.getNumericValue(source[currentPosition++]))
1929 // //error don't care of the value
1930 // currentCharacter = 'A';
1931 // } //something different from * and /
1933 // currentCharacter =
1934 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1938 } catch (IndexOutOfBoundsException e) {
1947 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1949 scanIdentifierOrKeyword((currentCharacter == '$'));
1950 } catch (InvalidInputException ex) {
1954 if (Character.isDigit(currentCharacter)) {
1957 } catch (InvalidInputException ex) {
1963 //-----------------end switch while try--------------------
1964 } catch (IndexOutOfBoundsException e) {
1965 } catch (InvalidInputException e) {
1969 // public final boolean jumpOverUnicodeWhiteSpace()
1970 // throws InvalidInputException {
1972 // //handle the case of unicode. Jump over the next whiteSpace
1973 // //making startPosition pointing on the next available char
1974 // //On false, the currentCharacter is filled up with a potential
1978 // this.wasAcr = false;
1979 // int c1, c2, c3, c4;
1980 // int unicodeSize = 6;
1981 // currentPosition++;
1982 // while (source[currentPosition] == 'u') {
1983 // currentPosition++;
1987 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1989 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1991 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1993 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1995 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1998 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1999 // if (recordLineSeparator
2000 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2001 // pushLineSeparator();
2002 // if (Character.isWhitespace(currentCharacter))
2005 // //buffer the new char which is not a white space
2006 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2007 // //withoutUnicodePtr == 1 is true here
2009 // } catch (IndexOutOfBoundsException e) {
2010 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2013 public final int[] getLineEnds() {
2014 //return a bounded copy of this.lineEnds
2017 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2021 public char[] getSource() {
2024 final char[] optimizedCurrentTokenSource1() {
2025 //return always the same char[] build only once
2027 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2028 char charOne = source[startPosition];
2083 return new char[] { charOne };
2087 final char[] optimizedCurrentTokenSource2() {
2088 //try to return the same char[] build only once
2091 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2092 char[][] table = charArray_length[0][hash];
2094 while (++i < InternalTableSize) {
2095 char[] charArray = table[i];
2096 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2099 //---------other side---------
2101 int max = newEntry2;
2102 while (++i <= max) {
2103 char[] charArray = table[i];
2104 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2107 //--------add the entry-------
2108 if (++max >= InternalTableSize)
2111 table[max] = (r = new char[] { c0, c1 });
2116 final char[] optimizedCurrentTokenSource3() {
2117 //try to return the same char[] build only once
2121 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2123 char[][] table = charArray_length[1][hash];
2125 while (++i < InternalTableSize) {
2126 char[] charArray = table[i];
2127 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2130 //---------other side---------
2132 int max = newEntry3;
2133 while (++i <= max) {
2134 char[] charArray = table[i];
2135 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2138 //--------add the entry-------
2139 if (++max >= InternalTableSize)
2142 table[max] = (r = new char[] { c0, c1, c2 });
2147 final char[] optimizedCurrentTokenSource4() {
2148 //try to return the same char[] build only once
2150 char c0, c1, c2, c3;
2152 ((((long) (c0 = source[startPosition])) << 18)
2153 + ((c1 = source[startPosition + 1]) << 12)
2154 + ((c2 = source[startPosition + 2]) << 6)
2155 + (c3 = source[startPosition + 3]))
2157 char[][] table = charArray_length[2][(int) hash];
2159 while (++i < InternalTableSize) {
2160 char[] charArray = table[i];
2161 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2164 //---------other side---------
2166 int max = newEntry4;
2167 while (++i <= max) {
2168 char[] charArray = table[i];
2169 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2172 //--------add the entry-------
2173 if (++max >= InternalTableSize)
2176 table[max] = (r = new char[] { c0, c1, c2, c3 });
2182 final char[] optimizedCurrentTokenSource5() {
2183 //try to return the same char[] build only once
2185 char c0, c1, c2, c3, c4;
2187 ((((long) (c0 = source[startPosition])) << 24)
2188 + (((long) (c1 = source[startPosition + 1])) << 18)
2189 + ((c2 = source[startPosition + 2]) << 12)
2190 + ((c3 = source[startPosition + 3]) << 6)
2191 + (c4 = source[startPosition + 4]))
2193 char[][] table = charArray_length[3][(int) hash];
2195 while (++i < InternalTableSize) {
2196 char[] charArray = table[i];
2197 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2200 //---------other side---------
2202 int max = newEntry5;
2203 while (++i <= max) {
2204 char[] charArray = table[i];
2205 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2208 //--------add the entry-------
2209 if (++max >= InternalTableSize)
2212 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2218 final char[] optimizedCurrentTokenSource6() {
2219 //try to return the same char[] build only once
2221 char c0, c1, c2, c3, c4, c5;
2223 ((((long) (c0 = source[startPosition])) << 32)
2224 + (((long) (c1 = source[startPosition + 1])) << 24)
2225 + (((long) (c2 = source[startPosition + 2])) << 18)
2226 + ((c3 = source[startPosition + 3]) << 12)
2227 + ((c4 = source[startPosition + 4]) << 6)
2228 + (c5 = source[startPosition + 5]))
2230 char[][] table = charArray_length[4][(int) hash];
2232 while (++i < InternalTableSize) {
2233 char[] charArray = table[i];
2234 if ((c0 == charArray[0])
2235 && (c1 == charArray[1])
2236 && (c2 == charArray[2])
2237 && (c3 == charArray[3])
2238 && (c4 == charArray[4])
2239 && (c5 == charArray[5]))
2242 //---------other side---------
2244 int max = newEntry6;
2245 while (++i <= max) {
2246 char[] charArray = table[i];
2247 if ((c0 == charArray[0])
2248 && (c1 == charArray[1])
2249 && (c2 == charArray[2])
2250 && (c3 == charArray[3])
2251 && (c4 == charArray[4])
2252 && (c5 == charArray[5]))
2255 //--------add the entry-------
2256 if (++max >= InternalTableSize)
2259 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2264 public final void pushLineSeparator() throws InvalidInputException {
2265 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2266 final int INCREMENT = 250;
2268 if (this.checkNonExternalizedStringLiterals) {
2269 // reinitialize the current line for non externalize strings purpose
2272 //currentCharacter is at position currentPosition-1
2275 if (currentCharacter == '\r') {
2276 int separatorPos = currentPosition - 1;
2277 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2279 //System.out.println("CR-" + separatorPos);
2281 lineEnds[++linePtr] = separatorPos;
2282 } catch (IndexOutOfBoundsException e) {
2283 //linePtr value is correct
2284 int oldLength = lineEnds.length;
2285 int[] old = lineEnds;
2286 lineEnds = new int[oldLength + INCREMENT];
2287 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2288 lineEnds[linePtr] = separatorPos;
2290 // look-ahead for merged cr+lf
2292 if (source[currentPosition] == '\n') {
2293 //System.out.println("look-ahead LF-" + currentPosition);
2294 lineEnds[linePtr] = currentPosition;
2300 } catch (IndexOutOfBoundsException e) {
2305 if (currentCharacter == '\n') {
2306 //must merge eventual cr followed by lf
2307 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2308 //System.out.println("merge LF-" + (currentPosition - 1));
2309 lineEnds[linePtr] = currentPosition - 1;
2311 int separatorPos = currentPosition - 1;
2312 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2314 // System.out.println("LF-" + separatorPos);
2316 lineEnds[++linePtr] = separatorPos;
2317 } catch (IndexOutOfBoundsException e) {
2318 //linePtr value is correct
2319 int oldLength = lineEnds.length;
2320 int[] old = lineEnds;
2321 lineEnds = new int[oldLength + INCREMENT];
2322 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2323 lineEnds[linePtr] = separatorPos;
2330 public final void pushUnicodeLineSeparator() {
2331 // isUnicode means that the \r or \n has been read as a unicode character
2333 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2335 final int INCREMENT = 250;
2336 //currentCharacter is at position currentPosition-1
2338 if (this.checkNonExternalizedStringLiterals) {
2339 // reinitialize the current line for non externalize strings purpose
2344 if (currentCharacter == '\r') {
2345 int separatorPos = currentPosition - 6;
2346 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2348 //System.out.println("CR-" + separatorPos);
2350 lineEnds[++linePtr] = separatorPos;
2351 } catch (IndexOutOfBoundsException e) {
2352 //linePtr value is correct
2353 int oldLength = lineEnds.length;
2354 int[] old = lineEnds;
2355 lineEnds = new int[oldLength + INCREMENT];
2356 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2357 lineEnds[linePtr] = separatorPos;
2359 // look-ahead for merged cr+lf
2360 if (source[currentPosition] == '\n') {
2361 //System.out.println("look-ahead LF-" + currentPosition);
2362 lineEnds[linePtr] = currentPosition;
2370 if (currentCharacter == '\n') {
2371 //must merge eventual cr followed by lf
2372 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2373 //System.out.println("merge LF-" + (currentPosition - 1));
2374 lineEnds[linePtr] = currentPosition - 6;
2376 int separatorPos = currentPosition - 6;
2377 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2379 // System.out.println("LF-" + separatorPos);
2381 lineEnds[++linePtr] = separatorPos;
2382 } catch (IndexOutOfBoundsException e) {
2383 //linePtr value is correct
2384 int oldLength = lineEnds.length;
2385 int[] old = lineEnds;
2386 lineEnds = new int[oldLength + INCREMENT];
2387 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2388 lineEnds[linePtr] = separatorPos;
2395 public final void recordComment(boolean isJavadoc) {
2397 // a new annotation comment is recorded
2399 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2400 } catch (IndexOutOfBoundsException e) {
2401 int oldStackLength = commentStops.length;
2402 int[] oldStack = commentStops;
2403 commentStops = new int[oldStackLength + 30];
2404 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2405 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2406 //grows the positions buffers too
2407 int[] old = commentStarts;
2408 commentStarts = new int[oldStackLength + 30];
2409 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2412 //the buffer is of a correct size here
2413 commentStarts[commentPtr] = startPosition;
2415 public void resetTo(int begin, int end) {
2416 //reset the scanner to a given position where it may rescan again
2419 initialPosition = startPosition = currentPosition = begin;
2420 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2421 commentPtr = -1; // reset comment stack
2424 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2425 // the string with "\\u" is a legal string of two chars \ and u
2426 //thus we use a direct access to the source (for regular cases).
2428 // if (unicodeAsBackSlash) {
2429 // // consume next character
2430 // unicodeAsBackSlash = false;
2431 // if (((currentCharacter = source[currentPosition++]) == '\\')
2432 // && (source[currentPosition] == 'u')) {
2433 // getNextUnicodeChar();
2435 // if (withoutUnicodePtr != 0) {
2436 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2440 currentCharacter = source[currentPosition++];
2441 switch (currentCharacter) {
2443 currentCharacter = '\'';
2446 currentCharacter = '\\';
2449 currentCharacter = '\\';
2454 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2455 // the string with "\\u" is a legal string of two chars \ and u
2456 //thus we use a direct access to the source (for regular cases).
2458 // if (unicodeAsBackSlash) {
2459 // // consume next character
2460 // unicodeAsBackSlash = false;
2461 // if (((currentCharacter = source[currentPosition++]) == '\\')
2462 // && (source[currentPosition] == 'u')) {
2463 // getNextUnicodeChar();
2465 // if (withoutUnicodePtr != 0) {
2466 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2470 currentCharacter = source[currentPosition++];
2471 switch (currentCharacter) {
2473 // currentCharacter = '\b';
2476 currentCharacter = '\t';
2479 currentCharacter = '\n';
2482 // currentCharacter = '\f';
2485 currentCharacter = '\r';
2488 currentCharacter = '\"';
2491 currentCharacter = '\'';
2494 currentCharacter = '\\';
2497 currentCharacter = '$';
2500 // -----------octal escape--------------
2502 // OctalDigit OctalDigit
2503 // ZeroToThree OctalDigit OctalDigit
2505 int number = Character.getNumericValue(currentCharacter);
2506 if (number >= 0 && number <= 7) {
2507 boolean zeroToThreeNot = number > 3;
2508 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2509 int digit = Character.getNumericValue(currentCharacter);
2510 if (digit >= 0 && digit <= 7) {
2511 number = (number * 8) + digit;
2512 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2513 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2516 digit = Character.getNumericValue(currentCharacter);
2517 if (digit >= 0 && digit <= 7) {
2518 // has read \ZeroToThree OctalDigit OctalDigit
2519 number = (number * 8) + digit;
2520 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2524 } else { // has read \OctalDigit NonDigit--> ignore last character
2527 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2530 } else { // has read \OctalDigit --> ignore last character
2534 throw new InvalidInputException(INVALID_ESCAPE);
2535 currentCharacter = (char) number;
2538 // throw new InvalidInputException(INVALID_ESCAPE);
2542 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2543 // return scanIdentifierOrKeyword( false );
2546 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2549 //first dispatch on the first char.
2550 //then the length. If there are several
2551 //keywors with the same length AND the same first char, then do another
2552 //disptach on the second char :-)...cool....but fast !
2554 useAssertAsAnIndentifier = false;
2556 while (getNextCharAsJavaIdentifierPart()) {
2560 if (new String(getCurrentTokenSource()).equals("$this")) {
2561 return TokenNamethis;
2563 return TokenNameVariable;
2568 // if (withoutUnicodePtr == 0)
2570 //quick test on length == 1 but not on length > 12 while most identifier
2571 //have a length which is <= 12...but there are lots of identifier with
2575 if ((length = currentPosition - startPosition) == 1)
2576 return TokenNameIdentifier;
2578 data = new char[length];
2579 index = startPosition;
2580 for (int i = 0; i < length; i++) {
2581 data[i] = Character.toLowerCase(source[index + i]);
2585 // if ((length = withoutUnicodePtr) == 1)
2586 // return TokenNameIdentifier;
2587 // // data = withoutUnicodeBuffer;
2588 // data = new char[withoutUnicodeBuffer.length];
2589 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2590 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2595 firstLetter = data[index];
2596 switch (firstLetter) {
2598 case 'a' : // as and array
2601 if ((data[++index] == 's')) {
2604 return TokenNameIdentifier;
2607 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2608 return TokenNameAND;
2610 return TokenNameIdentifier;
2613 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2614 // return TokenNamearray;
2616 // return TokenNameIdentifier;
2618 return TokenNameIdentifier;
2623 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2624 return TokenNamebreak;
2626 return TokenNameIdentifier;
2628 return TokenNameIdentifier;
2631 case 'c' : //case class continue
2634 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2635 return TokenNamecase;
2637 return TokenNameIdentifier;
2639 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2640 return TokenNameclass;
2642 return TokenNameIdentifier;
2644 if ((data[++index] == 'o')
2645 && (data[++index] == 'n')
2646 && (data[++index] == 't')
2647 && (data[++index] == 'i')
2648 && (data[++index] == 'n')
2649 && (data[++index] == 'u')
2650 && (data[++index] == 'e'))
2651 return TokenNamecontinue;
2653 return TokenNameIdentifier;
2655 return TokenNameIdentifier;
2658 case 'd' : //define default do
2661 if ((data[++index] == 'o'))
2664 return TokenNameIdentifier;
2666 if ((data[++index] == 'e')
2667 && (data[++index] == 'f')
2668 && (data[++index] == 'i')
2669 && (data[++index] == 'n')
2670 && (data[++index] == 'e'))
2671 return TokenNamedefine;
2673 return TokenNameIdentifier;
2675 if ((data[++index] == 'e')
2676 && (data[++index] == 'f')
2677 && (data[++index] == 'a')
2678 && (data[++index] == 'u')
2679 && (data[++index] == 'l')
2680 && (data[++index] == 't'))
2681 return TokenNamedefault;
2683 return TokenNameIdentifier;
2685 return TokenNameIdentifier;
2687 case 'e' : //echo else elseif extends
2690 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2691 return TokenNameecho;
2692 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2693 return TokenNameelse;
2695 return TokenNameIdentifier;
2697 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2698 return TokenNameendif;
2700 return TokenNameIdentifier;
2702 if ((data[++index] == 'n')
2703 && (data[++index] == 'd')
2704 && (data[++index] == 'f')
2705 && (data[++index] == 'o')
2706 && (data[++index] == 'r'))
2707 return TokenNameendfor;
2709 (data[index] == 'l')
2710 && (data[++index] == 's')
2711 && (data[++index] == 'e')
2712 && (data[++index] == 'i')
2713 && (data[++index] == 'f'))
2714 return TokenNameelseif;
2716 return TokenNameIdentifier;
2718 if ((data[++index] == 'x')
2719 && (data[++index] == 't')
2720 && (data[++index] == 'e')
2721 && (data[++index] == 'n')
2722 && (data[++index] == 'd')
2723 && (data[++index] == 's'))
2724 return TokenNameextends;
2726 return TokenNameIdentifier;
2727 case 8 : // endwhile
2728 if ((data[++index] == 'n')
2729 && (data[++index] == 'd')
2730 && (data[++index] == 'w')
2731 && (data[++index] == 'h')
2732 && (data[++index] == 'i')
2733 && (data[++index] == 'l')
2734 && (data[++index] == 'e'))
2735 return TokenNameendwhile;
2737 return TokenNameIdentifier;
2738 case 9 : // endswitch
2739 if ((data[++index] == 'n')
2740 && (data[++index] == 'd')
2741 && (data[++index] == 's')
2742 && (data[++index] == 'w')
2743 && (data[++index] == 'i')
2744 && (data[++index] == 't')
2745 && (data[++index] == 'c')
2746 && (data[++index] == 'h'))
2747 return TokenNameendswitch;
2749 return TokenNameIdentifier;
2750 case 10 : // endforeach
2751 if ((data[++index] == 'n')
2752 && (data[++index] == 'd')
2753 && (data[++index] == 'f')
2754 && (data[++index] == 'o')
2755 && (data[++index] == 'r')
2756 && (data[++index] == 'e')
2757 && (data[++index] == 'a')
2758 && (data[++index] == 'c')
2759 && (data[++index] == 'h'))
2760 return TokenNameendforeach;
2762 return TokenNameIdentifier;
2765 return TokenNameIdentifier;
2768 case 'f' : //for false function
2771 if ((data[++index] == 'o') && (data[++index] == 'r'))
2772 return TokenNamefor;
2774 return TokenNameIdentifier;
2776 if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2777 return TokenNamefalse;
2779 return TokenNameIdentifier;
2780 case 7 : // function
2781 if ((data[++index] == 'o')
2782 && (data[++index] == 'r')
2783 && (data[++index] == 'e')
2784 && (data[++index] == 'a')
2785 && (data[++index] == 'c')
2786 && (data[++index] == 'h'))
2787 return TokenNameforeach;
2789 return TokenNameIdentifier;
2790 case 8 : // function
2791 if ((data[++index] == 'u')
2792 && (data[++index] == 'n')
2793 && (data[++index] == 'c')
2794 && (data[++index] == 't')
2795 && (data[++index] == 'i')
2796 && (data[++index] == 'o')
2797 && (data[++index] == 'n'))
2798 return TokenNamefunction;
2800 return TokenNameIdentifier;
2802 return TokenNameIdentifier;
2806 if ((data[++index] == 'l')
2807 && (data[++index] == 'o')
2808 && (data[++index] == 'b')
2809 && (data[++index] == 'a')
2810 && (data[++index] == 'l')) {
2811 return TokenNameglobal;
2814 return TokenNameIdentifier;
2819 if (data[++index] == 'f')
2822 return TokenNameIdentifier;
2824 // if ((data[++index] == 'n') && (data[++index] == 't'))
2825 // return TokenNameint;
2827 // return TokenNameIdentifier;
2829 if ((data[++index] == 'n')
2830 && (data[++index] == 'c')
2831 && (data[++index] == 'l')
2832 && (data[++index] == 'u')
2833 && (data[++index] == 'd')
2834 && (data[++index] == 'e'))
2835 return TokenNameinclude;
2837 return TokenNameIdentifier;
2839 if ((data[++index] == 'n')
2840 && (data[++index] == 'c')
2841 && (data[++index] == 'l')
2842 && (data[++index] == 'u')
2843 && (data[++index] == 'd')
2844 && (data[++index] == 'e')
2845 && (data[++index] == '_')
2846 && (data[++index] == 'o')
2847 && (data[++index] == 'n')
2848 && (data[++index] == 'c')
2849 && (data[++index] == 'e'))
2850 return TokenNameinclude_once;
2852 return TokenNameIdentifier;
2854 return TokenNameIdentifier;
2859 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2860 return TokenNamelist;
2863 return TokenNameIdentifier;
2865 case 'n' : // new null
2868 if ((data[++index] == 'e') && (data[++index] == 'w'))
2869 return TokenNamenew;
2871 return TokenNameIdentifier;
2873 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2874 return TokenNamenull;
2876 return TokenNameIdentifier;
2879 return TokenNameIdentifier;
2881 case 'o' : // or old_function
2883 if (data[++index] == 'r') {
2887 // if (length == 12) {
2888 // if ((data[++index] == 'l')
2889 // && (data[++index] == 'd')
2890 // && (data[++index] == '_')
2891 // && (data[++index] == 'f')
2892 // && (data[++index] == 'u')
2893 // && (data[++index] == 'n')
2894 // && (data[++index] == 'c')
2895 // && (data[++index] == 't')
2896 // && (data[++index] == 'i')
2897 // && (data[++index] == 'o')
2898 // && (data[++index] == 'n')) {
2899 // return TokenNameold_function;
2902 return TokenNameIdentifier;
2906 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2907 return TokenNameprint;
2910 return TokenNameIdentifier;
2911 case 'r' : //return require require_once
2913 if ((data[++index] == 'e')
2914 && (data[++index] == 't')
2915 && (data[++index] == 'u')
2916 && (data[++index] == 'r')
2917 && (data[++index] == 'n')) {
2918 return TokenNamereturn;
2920 } else if (length == 7) {
2921 if ((data[++index] == 'e')
2922 && (data[++index] == 'q')
2923 && (data[++index] == 'u')
2924 && (data[++index] == 'i')
2925 && (data[++index] == 'r')
2926 && (data[++index] == 'e')) {
2927 return TokenNamerequire;
2929 } else if (length == 12) {
2930 if ((data[++index] == 'e')
2931 && (data[++index] == 'q')
2932 && (data[++index] == 'u')
2933 && (data[++index] == 'i')
2934 && (data[++index] == 'r')
2935 && (data[++index] == 'e')
2936 && (data[++index] == '_')
2937 && (data[++index] == 'o')
2938 && (data[++index] == 'n')
2939 && (data[++index] == 'c')
2940 && (data[++index] == 'e')) {
2941 return TokenNamerequire_once;
2944 return TokenNameIdentifier;
2946 case 's' : //static switch
2949 if (data[++index] == 't')
2950 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2951 return TokenNamestatic;
2953 return TokenNameIdentifier;
2955 (data[index] == 'w')
2956 && (data[++index] == 'i')
2957 && (data[++index] == 't')
2958 && (data[++index] == 'c')
2959 && (data[++index] == 'h'))
2960 return TokenNameswitch;
2962 return TokenNameIdentifier;
2964 return TokenNameIdentifier;
2971 if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2972 return TokenNametrue;
2974 return TokenNameIdentifier;
2975 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2976 // return TokenNamethis;
2979 return TokenNameIdentifier;
2985 if ((data[++index] == 'a') && (data[++index] == 'r'))
2986 return TokenNamevar;
2988 return TokenNameIdentifier;
2991 return TokenNameIdentifier;
2997 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2998 return TokenNamewhile;
3000 return TokenNameIdentifier;
3001 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3002 //return TokenNamewidefp ;
3004 //return TokenNameIdentifier;
3006 return TokenNameIdentifier;
3012 if ((data[++index] == 'o') && (data[++index] == 'r'))
3013 return TokenNameXOR;
3015 return TokenNameIdentifier;
3018 return TokenNameIdentifier;
3021 return TokenNameIdentifier;
3024 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3026 //when entering this method the currentCharacter is the firt
3027 //digit of the number , i.e. it may be preceeded by a . when
3030 boolean floating = dotPrefix;
3031 if ((!dotPrefix) && (currentCharacter == '0')) {
3032 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3033 //force the first char of the hexa number do exist...
3034 // consume next character
3035 unicodeAsBackSlash = false;
3036 currentCharacter = source[currentPosition++];
3037 // if (((currentCharacter = source[currentPosition++]) == '\\')
3038 // && (source[currentPosition] == 'u')) {
3039 // getNextUnicodeChar();
3041 // if (withoutUnicodePtr != 0) {
3042 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3045 if (Character.digit(currentCharacter, 16) == -1)
3046 throw new InvalidInputException(INVALID_HEXA);
3048 while (getNextCharAsDigit(16)) {
3050 // if (getNextChar('l', 'L') >= 0)
3051 // return TokenNameLongLiteral;
3053 return TokenNameIntegerLiteral;
3056 //there is x or X in the number
3057 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3058 if (getNextCharAsDigit()) {
3059 //-------------potential octal-----------------
3060 while (getNextCharAsDigit()) {
3063 // if (getNextChar('l', 'L') >= 0) {
3064 // return TokenNameLongLiteral;
3067 // if (getNextChar('f', 'F') >= 0) {
3068 // return TokenNameFloatingPointLiteral;
3071 if (getNextChar('d', 'D') >= 0) {
3072 return TokenNameDoubleLiteral;
3073 } else { //make the distinction between octal and float ....
3074 if (getNextChar('.')) { //bingo ! ....
3075 while (getNextCharAsDigit()) {
3077 if (getNextChar('e', 'E') >= 0) {
3078 // consume next character
3079 unicodeAsBackSlash = false;
3080 currentCharacter = source[currentPosition++];
3081 // if (((currentCharacter = source[currentPosition++]) == '\\')
3082 // && (source[currentPosition] == 'u')) {
3083 // getNextUnicodeChar();
3085 // if (withoutUnicodePtr != 0) {
3086 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3090 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3091 // consume next character
3092 unicodeAsBackSlash = false;
3093 currentCharacter = source[currentPosition++];
3094 // if (((currentCharacter = source[currentPosition++]) == '\\')
3095 // && (source[currentPosition] == 'u')) {
3096 // getNextUnicodeChar();
3098 // if (withoutUnicodePtr != 0) {
3099 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3100 // currentCharacter;
3104 if (!Character.isDigit(currentCharacter))
3105 throw new InvalidInputException(INVALID_FLOAT);
3106 while (getNextCharAsDigit()) {
3109 // if (getNextChar('f', 'F') >= 0)
3110 // return TokenNameFloatingPointLiteral;
3111 getNextChar('d', 'D'); //jump over potential d or D
3112 return TokenNameDoubleLiteral;
3114 return TokenNameIntegerLiteral;
3122 while (getNextCharAsDigit()) {
3125 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3126 // return TokenNameLongLiteral;
3128 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3129 while (getNextCharAsDigit()) {
3134 //if floating is true both exponant and suffix may be optional
3136 if (getNextChar('e', 'E') >= 0) {
3138 // consume next character
3139 unicodeAsBackSlash = false;
3140 currentCharacter = source[currentPosition++];
3141 // if (((currentCharacter = source[currentPosition++]) == '\\')
3142 // && (source[currentPosition] == 'u')) {
3143 // getNextUnicodeChar();
3145 // if (withoutUnicodePtr != 0) {
3146 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3150 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3151 unicodeAsBackSlash = false;
3152 currentCharacter = source[currentPosition++];
3153 // if (((currentCharacter = source[currentPosition++]) == '\\')
3154 // && (source[currentPosition] == 'u')) {
3155 // getNextUnicodeChar();
3157 // if (withoutUnicodePtr != 0) {
3158 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3162 if (!Character.isDigit(currentCharacter))
3163 throw new InvalidInputException(INVALID_FLOAT);
3164 while (getNextCharAsDigit()) {
3168 if (getNextChar('d', 'D') >= 0)
3169 return TokenNameDoubleLiteral;
3170 // if (getNextChar('f', 'F') >= 0)
3171 // return TokenNameFloatingPointLiteral;
3173 //the long flag has been tested before
3175 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3178 * Search the line number corresponding to a specific position
3181 public final int getLineNumber(int position) {
3183 if (lineEnds == null)
3185 int length = linePtr + 1;
3188 int g = 0, d = length - 1;
3192 if (position < lineEnds[m]) {
3194 } else if (position > lineEnds[m]) {
3200 if (position < lineEnds[m]) {
3206 public void setPHPMode(boolean mode) {
3210 public final void setSource(char[] source) {
3211 //the source-buffer is set to sourceString
3213 if (source == null) {
3214 this.source = new char[0];
3216 this.source = source;
3219 initialPosition = currentPosition = 0;
3220 containsAssertKeyword = false;
3221 withoutUnicodeBuffer = new char[this.source.length];
3225 public String toString() {
3226 if (startPosition == source.length)
3227 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3228 if (currentPosition > source.length)
3229 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3231 char front[] = new char[startPosition];
3232 System.arraycopy(source, 0, front, 0, startPosition);
3234 int middleLength = (currentPosition - 1) - startPosition + 1;
3236 if (middleLength > -1) {
3237 middle = new char[middleLength];
3238 System.arraycopy(source, startPosition, middle, 0, middleLength);
3240 middle = new char[0];
3243 char end[] = new char[source.length - (currentPosition - 1)];
3244 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3246 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3247 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3250 public final String toStringAction(int act) {
3253 case TokenNameERROR :
3254 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3255 case TokenNameStopPHP :
3256 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3257 case TokenNameIdentifier :
3258 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3259 case TokenNameVariable :
3260 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3262 return "as"; //$NON-NLS-1$
3263 case TokenNamebreak :
3264 return "break"; //$NON-NLS-1$
3265 case TokenNamecase :
3266 return "case"; //$NON-NLS-1$
3267 case TokenNameclass :
3268 return "class"; //$NON-NLS-1$
3269 case TokenNamecontinue :
3270 return "continue"; //$NON-NLS-1$
3271 case TokenNamedefault :
3272 return "default"; //$NON-NLS-1$
3273 case TokenNamedefine :
3274 return "define"; //$NON-NLS-1$
3276 return "do"; //$NON-NLS-1$
3277 case TokenNameecho :
3278 return "echo"; //$NON-NLS-1$
3279 case TokenNameelse :
3280 return "else"; //$NON-NLS-1$
3281 case TokenNameelseif :
3282 return "elseif"; //$NON-NLS-1$
3283 case TokenNameendfor :
3284 return "endfor"; //$NON-NLS-1$
3285 case TokenNameendforeach :
3286 return "endforeach"; //$NON-NLS-1$
3287 case TokenNameendif :
3288 return "endif"; //$NON-NLS-1$
3289 case TokenNameendswitch :
3290 return "endswitch"; //$NON-NLS-1$
3291 case TokenNameendwhile :
3292 return "endwhile"; //$NON-NLS-1$
3293 case TokenNameextends :
3294 return "extends"; //$NON-NLS-1$
3295 case TokenNamefalse :
3296 return "false"; //$NON-NLS-1$
3298 return "for"; //$NON-NLS-1$
3299 case TokenNameforeach :
3300 return "foreach"; //$NON-NLS-1$
3301 case TokenNamefunction :
3302 return "function"; //$NON-NLS-1$
3303 case TokenNameglobal :
3304 return "global"; //$NON-NLS-1$
3306 return "if"; //$NON-NLS-1$
3307 case TokenNameinclude :
3308 return "include"; //$NON-NLS-1$
3309 case TokenNameinclude_once :
3310 return "include_once"; //$NON-NLS-1$
3311 case TokenNamelist :
3312 return "list"; //$NON-NLS-1$
3314 return "new"; //$NON-NLS-1$
3315 case TokenNamenull :
3316 return "null"; //$NON-NLS-1$
3317 case TokenNameprint :
3318 return "print"; //$NON-NLS-1$
3319 case TokenNamerequire :
3320 return "require"; //$NON-NLS-1$
3321 case TokenNamerequire_once :
3322 return "require_once"; //$NON-NLS-1$
3323 case TokenNamereturn :
3324 return "return"; //$NON-NLS-1$
3325 case TokenNamestatic :
3326 return "static"; //$NON-NLS-1$
3327 case TokenNameswitch :
3328 return "switch"; //$NON-NLS-1$
3329 case TokenNametrue :
3330 return "true"; //$NON-NLS-1$
3332 return "var"; //$NON-NLS-1$
3333 case TokenNamewhile :
3334 return "while"; //$NON-NLS-1$
3335 case TokenNamethis :
3336 return "$this"; //$NON-NLS-1$
3337 case TokenNameIntegerLiteral :
3338 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3339 case TokenNameDoubleLiteral :
3340 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3341 case TokenNameStringLiteral :
3342 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3343 case TokenNameStringConstant :
3344 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3345 case TokenNameStringInterpolated :
3346 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3347 case TokenNameHEREDOC :
3348 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3350 case TokenNamePLUS_PLUS :
3351 return "++"; //$NON-NLS-1$
3352 case TokenNameMINUS_MINUS :
3353 return "--"; //$NON-NLS-1$
3354 case TokenNameEQUAL_EQUAL :
3355 return "=="; //$NON-NLS-1$
3356 case TokenNameEQUAL_EQUAL_EQUAL :
3357 return "==="; //$NON-NLS-1$
3358 case TokenNameEQUAL_GREATER :
3359 return "=>"; //$NON-NLS-1$
3360 case TokenNameLESS_EQUAL :
3361 return "<="; //$NON-NLS-1$
3362 case TokenNameGREATER_EQUAL :
3363 return ">="; //$NON-NLS-1$
3364 case TokenNameNOT_EQUAL :
3365 return "!="; //$NON-NLS-1$
3366 case TokenNameNOT_EQUAL_EQUAL :
3367 return "!=="; //$NON-NLS-1$
3368 case TokenNameLEFT_SHIFT :
3369 return "<<"; //$NON-NLS-1$
3370 case TokenNameRIGHT_SHIFT :
3371 return ">>"; //$NON-NLS-1$
3372 case TokenNamePLUS_EQUAL :
3373 return "+="; //$NON-NLS-1$
3374 case TokenNameMINUS_EQUAL :
3375 return "-="; //$NON-NLS-1$
3376 case TokenNameMULTIPLY_EQUAL :
3377 return "*="; //$NON-NLS-1$
3378 case TokenNameDIVIDE_EQUAL :
3379 return "/="; //$NON-NLS-1$
3380 case TokenNameAND_EQUAL :
3381 return "&="; //$NON-NLS-1$
3382 case TokenNameOR_EQUAL :
3383 return "|="; //$NON-NLS-1$
3384 case TokenNameXOR_EQUAL :
3385 return "^="; //$NON-NLS-1$
3386 case TokenNameREMAINDER_EQUAL :
3387 return "%="; //$NON-NLS-1$
3388 case TokenNameLEFT_SHIFT_EQUAL :
3389 return "<<="; //$NON-NLS-1$
3390 case TokenNameRIGHT_SHIFT_EQUAL :
3391 return ">>="; //$NON-NLS-1$
3392 case TokenNameOR_OR :
3393 return "||"; //$NON-NLS-1$
3394 case TokenNameAND_AND :
3395 return "&&"; //$NON-NLS-1$
3396 case TokenNamePLUS :
3397 return "+"; //$NON-NLS-1$
3398 case TokenNameMINUS :
3399 return "-"; //$NON-NLS-1$
3400 case TokenNameMINUS_GREATER :
3403 return "!"; //$NON-NLS-1$
3404 case TokenNameREMAINDER :
3405 return "%"; //$NON-NLS-1$
3407 return "^"; //$NON-NLS-1$
3409 return "&"; //$NON-NLS-1$
3410 case TokenNameMULTIPLY :
3411 return "*"; //$NON-NLS-1$
3413 return "|"; //$NON-NLS-1$
3414 case TokenNameTWIDDLE :
3415 return "~"; //$NON-NLS-1$
3416 case TokenNameTWIDDLE_EQUAL :
3417 return "~="; //$NON-NLS-1$
3418 case TokenNameDIVIDE :
3419 return "/"; //$NON-NLS-1$
3420 case TokenNameGREATER :
3421 return ">"; //$NON-NLS-1$
3422 case TokenNameLESS :
3423 return "<"; //$NON-NLS-1$
3424 case TokenNameLPAREN :
3425 return "("; //$NON-NLS-1$
3426 case TokenNameRPAREN :
3427 return ")"; //$NON-NLS-1$
3428 case TokenNameLBRACE :
3429 return "{"; //$NON-NLS-1$
3430 case TokenNameRBRACE :
3431 return "}"; //$NON-NLS-1$
3432 case TokenNameLBRACKET :
3433 return "["; //$NON-NLS-1$
3434 case TokenNameRBRACKET :
3435 return "]"; //$NON-NLS-1$
3436 case TokenNameSEMICOLON :
3437 return ";"; //$NON-NLS-1$
3438 case TokenNameQUESTION :
3439 return "?"; //$NON-NLS-1$
3440 case TokenNameCOLON :
3441 return ":"; //$NON-NLS-1$
3442 case TokenNameCOMMA :
3443 return ","; //$NON-NLS-1$
3445 return "."; //$NON-NLS-1$
3446 case TokenNameEQUAL :
3447 return "="; //$NON-NLS-1$
3450 case TokenNameDOLLAR_LBRACE :
3453 return "EOF"; //$NON-NLS-1$
3454 case TokenNameWHITESPACE :
3455 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3456 case TokenNameCOMMENT_LINE :
3457 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3458 case TokenNameCOMMENT_BLOCK :
3459 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3460 case TokenNameCOMMENT_PHPDOC :
3461 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3462 case TokenNameHTML :
3463 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3465 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3469 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3470 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3474 boolean tokenizeComments,
3475 boolean tokenizeWhiteSpace,
3476 boolean checkNonExternalizedStringLiterals,
3477 boolean assertMode) {
3478 this.eofPosition = Integer.MAX_VALUE;
3479 this.tokenizeComments = tokenizeComments;
3480 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3481 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3482 this.assertMode = assertMode;
3485 private void checkNonExternalizeString() throws InvalidInputException {
3486 if (currentLine == null)
3488 parseTags(currentLine);
3491 private void parseTags(NLSLine line) throws InvalidInputException {
3492 String s = new String(getCurrentTokenSource());
3493 int pos = s.indexOf(TAG_PREFIX);
3494 int lineLength = line.size();
3496 int start = pos + TAG_PREFIX_LENGTH;
3497 int end = s.indexOf(TAG_POSTFIX, start);
3498 String index = s.substring(start, end);
3501 i = Integer.parseInt(index) - 1;
3502 // Tags are one based not zero based.
3503 } catch (NumberFormatException e) {
3504 i = -1; // we don't want to consider this as a valid NLS tag
3506 if (line.exists(i)) {
3509 pos = s.indexOf(TAG_PREFIX, start);
3512 this.nonNLSStrings = new StringLiteral[lineLength];
3513 int nonNLSCounter = 0;
3514 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3515 StringLiteral literal = (StringLiteral) iterator.next();
3516 if (literal != null) {
3517 this.nonNLSStrings[nonNLSCounter++] = literal;
3520 if (nonNLSCounter == 0) {
3521 this.nonNLSStrings = null;
3525 this.wasNonExternalizedStringLiteral = true;
3526 if (nonNLSCounter != lineLength) {
3527 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3533 public final void scanEscapeCharacter() throws InvalidInputException {
3534 // the string with "\\u" is a legal string of two chars \ and u
3535 //thus we use a direct access to the source (for regular cases).
3537 if (unicodeAsBackSlash) {
3538 // consume next character
3539 unicodeAsBackSlash = false;
3540 // if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
3541 // getNextUnicodeChar();
3543 if (withoutUnicodePtr != 0) {
3544 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3548 currentCharacter = source[currentPosition++];
3549 switch (currentCharacter) {
3551 currentCharacter = '\b';
3554 currentCharacter = '\t';
3557 currentCharacter = '\n';
3560 currentCharacter = '\f';
3563 currentCharacter = '\r';
3566 currentCharacter = '\"';
3569 currentCharacter = '\'';
3572 currentCharacter = '\\';
3575 // -----------octal escape--------------
3577 // OctalDigit OctalDigit
3578 // ZeroToThree OctalDigit OctalDigit
3580 int number = Character.getNumericValue(currentCharacter);
3581 if (number >= 0 && number <= 7) {
3582 boolean zeroToThreeNot = number > 3;
3583 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3584 int digit = Character.getNumericValue(currentCharacter);
3585 if (digit >= 0 && digit <= 7) {
3586 number = (number * 8) + digit;
3587 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3588 if (zeroToThreeNot) {// has read \NotZeroToThree OctalDigit Digit --> ignore last character
3591 digit = Character.getNumericValue(currentCharacter);
3592 if (digit >= 0 && digit <= 7){ // has read \ZeroToThree OctalDigit OctalDigit
3593 number = (number * 8) + digit;
3594 } else {// has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
3598 } else { // has read \OctalDigit NonDigit--> ignore last character
3601 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
3604 } else { // has read \OctalDigit --> ignore last character
3608 throw new InvalidInputException(INVALID_ESCAPE);
3609 currentCharacter = (char) number;
3611 throw new InvalidInputException(INVALID_ESCAPE);
3615 // chech presence of task: tags
3616 public void checkTaskTag(int commentStart, int commentEnd) {
3618 // only look for newer task: tags
3619 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount-1][0] >= commentStart) {
3622 int foundTaskIndex = this.foundTaskCount;
3623 nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
3626 char[] priority = null;
3628 // check for tag occurrence
3629 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++){
3630 tag = this.taskTags[itag];
3632 this.taskPriorities != null && itag < this.taskPriorities.length ?
3633 this.taskPriorities[itag] :
3635 int tagLength = tag.length;
3636 for (int t = 0; t < tagLength; t++){
3637 if (this.source[i+t] != tag[t]) continue nextTag;
3640 if (this.foundTaskTags == null){
3641 this.foundTaskTags = new char[5][];
3642 this.foundTaskMessages = new char[5][];
3643 this.foundTaskPriorities = new char[5][];
3644 this.foundTaskPositions = new int[5][];
3645 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3646 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount*2][], 0, this.foundTaskCount);
3647 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount*2][], 0, this.foundTaskCount);
3648 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount*2][], 0, this.foundTaskCount);
3649 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount*2][], 0, this.foundTaskCount);
3651 this.foundTaskTags[this.foundTaskCount] = tag;
3652 this.foundTaskPriorities[this.foundTaskCount] = priority;
3653 this.foundTaskPositions[this.foundTaskCount] = new int[]{ i, i+tagLength-1 };
3654 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3655 this.foundTaskCount++;
3657 i += tagLength-1; // will be incremented when looping
3661 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3662 // retrieve message start and end positions
3663 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
3664 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd-1; // at most beginning of next task
3665 if (max_value < msgStart) max_value = msgStart; // would only occur if tag is before EOF.
3669 for (int j = msgStart; j < max_value; j++){
3670 if ((c = this.source[j]) == '\n' || c == '\r'){
3677 for (int j = max_value; j > msgStart; j--){
3678 if ((c = this.source[j]) == '*') {
3683 if (end == -1) end = max_value;
3686 if (msgStart == end) continue; // empty
3689 while (CharOperation.isWhitespace(source[end]) && msgStart <= end) end--;
3690 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end) msgStart++;
3692 // update the end position of the task
3693 this.foundTaskPositions[i][1] = end;
3695 // get the message source
3696 final int messageLength = end-msgStart+1;
3697 char[] message = new char[messageLength];
3699 System.arraycopy(source, msgStart, message, 0, messageLength);
3700 this.foundTaskMessages[i] = message;