1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.IScanner;
18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
25 - getNextToken() which return the current type of the token
26 (this value is not memorized by the scanner)
27 - getCurrentTokenSource() which provides with the token "REAL" source
28 (aka all unicode have been transformed into a correct char)
29 - sourceStart gives the position into the stream
30 - currentPosition-1 gives the sourceEnd position into the stream
34 private boolean assertMode;
35 public boolean useAssertAsAnIndentifier = false;
36 //flag indicating if processed source contains occurrences of keyword assert
37 public boolean containsAssertKeyword = false;
39 public boolean recordLineSeparator;
40 public boolean phpMode = false;
42 public char currentCharacter;
43 public int startPosition;
44 public int currentPosition;
45 public int initialPosition, eofPosition;
46 // after this position eof are generated instead of real token from the source
48 public boolean tokenizeComments;
49 public boolean tokenizeWhiteSpace;
51 //source should be viewed as a window (aka a part)
52 //of a entire very large stream
56 public char[] withoutUnicodeBuffer;
57 public int withoutUnicodePtr;
58 //when == 0 ==> no unicode in the current token
59 public boolean unicodeAsBackSlash = false;
61 public boolean scanningFloatLiteral = false;
63 //support for /** comments
64 //public char[][] comments = new char[10][];
65 public int[] commentStops = new int[10];
66 public int[] commentStarts = new int[10];
67 public int commentPtr = -1; // no comment test with commentPtr value -1
69 //diet parsing support - jump over some method body when requested
70 public boolean diet = false;
72 //support for the poor-line-debuggers ....
73 //remember the position of the cr/lf
74 public int[] lineEnds = new int[250];
75 public int linePtr = -1;
76 public boolean wasAcr = false;
78 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
80 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
81 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
82 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
83 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
84 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
85 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
86 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
88 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
89 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
90 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
91 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
93 //----------------optimized identifier managment------------------
94 static final char[] charArray_a = new char[] { 'a' },
95 charArray_b = new char[] { 'b' },
96 charArray_c = new char[] { 'c' },
97 charArray_d = new char[] { 'd' },
98 charArray_e = new char[] { 'e' },
99 charArray_f = new char[] { 'f' },
100 charArray_g = new char[] { 'g' },
101 charArray_h = new char[] { 'h' },
102 charArray_i = new char[] { 'i' },
103 charArray_j = new char[] { 'j' },
104 charArray_k = new char[] { 'k' },
105 charArray_l = new char[] { 'l' },
106 charArray_m = new char[] { 'm' },
107 charArray_n = new char[] { 'n' },
108 charArray_o = new char[] { 'o' },
109 charArray_p = new char[] { 'p' },
110 charArray_q = new char[] { 'q' },
111 charArray_r = new char[] { 'r' },
112 charArray_s = new char[] { 's' },
113 charArray_t = new char[] { 't' },
114 charArray_u = new char[] { 'u' },
115 charArray_v = new char[] { 'v' },
116 charArray_w = new char[] { 'w' },
117 charArray_x = new char[] { 'x' },
118 charArray_y = new char[] { 'y' },
119 charArray_z = new char[] { 'z' };
121 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
122 static final int TableSize = 30, InternalTableSize = 6;
124 public static final int OptimizedLength = 6;
126 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
151 public static final int RoundBracket = 0;
152 public static final int SquareBracket = 1;
153 public static final int CurlyBracket = 2;
154 public static final int BracketKinds = 3;
156 public static final boolean DEBUG = false;
161 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
162 this(tokenizeComments, tokenizeWhiteSpace, false);
166 * Determines if the specified character is
167 * permissible as the first character in a PHP identifier
169 public static boolean isPHPIdentifierStart(char ch) {
170 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
174 * Determines if the specified character may be part of a PHP identifier as
175 * other than the first character
177 public static boolean isPHPIdentifierPart(char ch) {
178 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
181 public final boolean atEnd() {
182 // This code is not relevant if source is
183 // Only a part of the real stream input
185 return source.length == currentPosition;
187 public char[] getCurrentIdentifierSource() {
188 //return the token REAL source (aka unicodes are precomputed)
191 // if (withoutUnicodePtr != 0)
192 // //0 is used as a fast test flag so the real first char is in position 1
194 // withoutUnicodeBuffer,
196 // result = new char[withoutUnicodePtr],
198 // withoutUnicodePtr);
200 int length = currentPosition - startPosition;
201 switch (length) { // see OptimizedLength
203 return optimizedCurrentTokenSource1();
205 return optimizedCurrentTokenSource2();
207 return optimizedCurrentTokenSource3();
209 return optimizedCurrentTokenSource4();
211 return optimizedCurrentTokenSource5();
213 return optimizedCurrentTokenSource6();
216 System.arraycopy(source, startPosition, result = new char[length], 0, length);
220 public int getCurrentTokenEndPosition() {
221 return this.currentPosition - 1;
224 public final char[] getCurrentTokenSource() {
225 // Return the token REAL source (aka unicodes are precomputed)
228 // if (withoutUnicodePtr != 0)
229 // // 0 is used as a fast test flag so the real first char is in position 1
231 // withoutUnicodeBuffer,
233 // result = new char[withoutUnicodePtr],
235 // withoutUnicodePtr);
238 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
243 public final char[] getCurrentTokenSource(int startPos) {
244 // Return the token REAL source (aka unicodes are precomputed)
247 // if (withoutUnicodePtr != 0)
248 // // 0 is used as a fast test flag so the real first char is in position 1
250 // withoutUnicodeBuffer,
252 // result = new char[withoutUnicodePtr],
254 // withoutUnicodePtr);
257 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
262 public final char[] getCurrentTokenSourceString() {
263 //return the token REAL source (aka unicodes are precomputed).
264 //REMOVE the two " that are at the beginning and the end.
267 if (withoutUnicodePtr != 0)
268 //0 is used as a fast test flag so the real first char is in position 1
269 System.arraycopy(withoutUnicodeBuffer, 2,
270 //2 is 1 (real start) + 1 (to jump over the ")
271 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
274 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
278 public int getCurrentTokenStartPosition() {
279 return this.startPosition;
282 public final char[] getCurrentStringLiteralSource() {
283 // Return the token REAL source (aka unicodes are precomputed)
288 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
294 * Search the source position corresponding to the end of a given line number
296 * Line numbers are 1-based, and relative to the scanner initialPosition.
297 * Character positions are 0-based.
299 * In case the given line number is inconsistent, answers -1.
301 public final int getLineEnd(int lineNumber) {
303 if (lineEnds == null)
305 if (lineNumber >= lineEnds.length)
310 if (lineNumber == lineEnds.length - 1)
312 return lineEnds[lineNumber - 1];
313 // next line start one character behind the lineEnd of the previous line
316 * Search the source position corresponding to the beginning of a given line number
318 * Line numbers are 1-based, and relative to the scanner initialPosition.
319 * Character positions are 0-based.
321 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
323 * In case the given line number is inconsistent, answers -1.
325 public final int getLineStart(int lineNumber) {
327 if (lineEnds == null)
329 if (lineNumber >= lineEnds.length)
335 return initialPosition;
336 return lineEnds[lineNumber - 2] + 1;
337 // next line start one character behind the lineEnd of the previous line
339 public final boolean getNextChar(char testedChar) {
341 //handle the case of unicode.
342 //when a unicode appears then we must use a buffer that holds char internal values
343 //At the end of this method currentCharacter holds the new visited char
344 //and currentPosition points right next after it
345 //Both previous lines are true if the currentCharacter is == to the testedChar
346 //On false, no side effect has occured.
348 //ALL getNextChar.... ARE OPTIMIZED COPIES
350 int temp = currentPosition;
352 currentCharacter = source[currentPosition++];
353 // if (((currentCharacter = source[currentPosition++]) == '\\')
354 // && (source[currentPosition] == 'u')) {
355 // //-------------unicode traitement ------------
356 // int c1, c2, c3, c4;
357 // int unicodeSize = 6;
358 // currentPosition++;
359 // while (source[currentPosition] == 'u') {
360 // currentPosition++;
364 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
366 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
368 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
370 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
372 // currentPosition = temp;
376 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
377 // if (currentCharacter != testedChar) {
378 // currentPosition = temp;
381 // unicodeAsBackSlash = currentCharacter == '\\';
383 // //need the unicode buffer
384 // if (withoutUnicodePtr == 0) {
385 // //buffer all the entries that have been left aside....
386 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
390 // withoutUnicodeBuffer,
392 // withoutUnicodePtr);
394 // //fill the buffer with the char
395 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
398 // } //-------------end unicode traitement--------------
400 if (currentCharacter != testedChar) {
401 currentPosition = temp;
404 unicodeAsBackSlash = false;
405 // if (withoutUnicodePtr != 0)
406 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
409 } catch (IndexOutOfBoundsException e) {
410 unicodeAsBackSlash = false;
411 currentPosition = temp;
415 public final int getNextChar(char testedChar1, char testedChar2) {
416 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
417 //test can be done with (x==0) for the first and (x>0) for the second
418 //handle the case of unicode.
419 //when a unicode appears then we must use a buffer that holds char internal values
420 //At the end of this method currentCharacter holds the new visited char
421 //and currentPosition points right next after it
422 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
423 //On false, no side effect has occured.
425 //ALL getNextChar.... ARE OPTIMIZED COPIES
427 int temp = currentPosition;
430 currentCharacter = source[currentPosition++];
431 // if (((currentCharacter = source[currentPosition++]) == '\\')
432 // && (source[currentPosition] == 'u')) {
433 // //-------------unicode traitement ------------
434 // int c1, c2, c3, c4;
435 // int unicodeSize = 6;
436 // currentPosition++;
437 // while (source[currentPosition] == 'u') {
438 // currentPosition++;
442 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
444 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
446 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
448 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
450 // currentPosition = temp;
454 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
455 // if (currentCharacter == testedChar1)
457 // else if (currentCharacter == testedChar2)
460 // currentPosition = temp;
464 // //need the unicode buffer
465 // if (withoutUnicodePtr == 0) {
466 // //buffer all the entries that have been left aside....
467 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
471 // withoutUnicodeBuffer,
473 // withoutUnicodePtr);
475 // //fill the buffer with the char
476 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
478 // } //-------------end unicode traitement--------------
480 if (currentCharacter == testedChar1)
482 else if (currentCharacter == testedChar2)
485 currentPosition = temp;
489 // if (withoutUnicodePtr != 0)
490 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
493 } catch (IndexOutOfBoundsException e) {
494 currentPosition = temp;
498 public final boolean getNextCharAsDigit() {
500 //handle the case of unicode.
501 //when a unicode appears then we must use a buffer that holds char internal values
502 //At the end of this method currentCharacter holds the new visited char
503 //and currentPosition points right next after it
504 //Both previous lines are true if the currentCharacter is a digit
505 //On false, no side effect has occured.
507 //ALL getNextChar.... ARE OPTIMIZED COPIES
509 int temp = currentPosition;
511 currentCharacter = source[currentPosition++];
512 // if (((currentCharacter = source[currentPosition++]) == '\\')
513 // && (source[currentPosition] == 'u')) {
514 // //-------------unicode traitement ------------
515 // int c1, c2, c3, c4;
516 // int unicodeSize = 6;
517 // currentPosition++;
518 // while (source[currentPosition] == 'u') {
519 // currentPosition++;
523 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
525 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
527 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
529 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
531 // currentPosition = temp;
535 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
536 // if (!Character.isDigit(currentCharacter)) {
537 // currentPosition = temp;
541 // //need the unicode buffer
542 // if (withoutUnicodePtr == 0) {
543 // //buffer all the entries that have been left aside....
544 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
548 // withoutUnicodeBuffer,
550 // withoutUnicodePtr);
552 // //fill the buffer with the char
553 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
555 // } //-------------end unicode traitement--------------
557 if (!Character.isDigit(currentCharacter)) {
558 currentPosition = temp;
561 // if (withoutUnicodePtr != 0)
562 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
565 } catch (IndexOutOfBoundsException e) {
566 currentPosition = temp;
570 public final boolean getNextCharAsDigit(int radix) {
572 //handle the case of unicode.
573 //when a unicode appears then we must use a buffer that holds char internal values
574 //At the end of this method currentCharacter holds the new visited char
575 //and currentPosition points right next after it
576 //Both previous lines are true if the currentCharacter is a digit base on radix
577 //On false, no side effect has occured.
579 //ALL getNextChar.... ARE OPTIMIZED COPIES
581 int temp = currentPosition;
583 currentCharacter = source[currentPosition++];
584 // if (((currentCharacter = source[currentPosition++]) == '\\')
585 // && (source[currentPosition] == 'u')) {
586 // //-------------unicode traitement ------------
587 // int c1, c2, c3, c4;
588 // int unicodeSize = 6;
589 // currentPosition++;
590 // while (source[currentPosition] == 'u') {
591 // currentPosition++;
595 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
597 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
599 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
601 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
603 // currentPosition = temp;
607 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
608 // if (Character.digit(currentCharacter, radix) == -1) {
609 // currentPosition = temp;
613 // //need the unicode buffer
614 // if (withoutUnicodePtr == 0) {
615 // //buffer all the entries that have been left aside....
616 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
620 // withoutUnicodeBuffer,
622 // withoutUnicodePtr);
624 // //fill the buffer with the char
625 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
627 // } //-------------end unicode traitement--------------
629 if (Character.digit(currentCharacter, radix) == -1) {
630 currentPosition = temp;
633 // if (withoutUnicodePtr != 0)
634 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
637 } catch (IndexOutOfBoundsException e) {
638 currentPosition = temp;
642 public boolean getNextCharAsJavaIdentifierPart() {
644 //handle the case of unicode.
645 //when a unicode appears then we must use a buffer that holds char internal values
646 //At the end of this method currentCharacter holds the new visited char
647 //and currentPosition points right next after it
648 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
649 //On false, no side effect has occured.
651 //ALL getNextChar.... ARE OPTIMIZED COPIES
653 int temp = currentPosition;
655 currentCharacter = source[currentPosition++];
656 // if (((currentCharacter = source[currentPosition++]) == '\\')
657 // && (source[currentPosition] == 'u')) {
658 // //-------------unicode traitement ------------
659 // int c1, c2, c3, c4;
660 // int unicodeSize = 6;
661 // currentPosition++;
662 // while (source[currentPosition] == 'u') {
663 // currentPosition++;
667 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
669 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
671 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
673 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
675 // currentPosition = temp;
679 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
680 // if (!isPHPIdentifierPart(currentCharacter)) {
681 // currentPosition = temp;
685 // //need the unicode buffer
686 // if (withoutUnicodePtr == 0) {
687 // //buffer all the entries that have been left aside....
688 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
692 // withoutUnicodeBuffer,
694 // withoutUnicodePtr);
696 // //fill the buffer with the char
697 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
699 // } //-------------end unicode traitement--------------
701 if (!isPHPIdentifierPart(currentCharacter)) {
702 currentPosition = temp;
706 // if (withoutUnicodePtr != 0)
707 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
710 } catch (IndexOutOfBoundsException e) {
711 currentPosition = temp;
716 public int getNextToken() throws InvalidInputException {
717 int htmlPosition = currentPosition;
720 currentCharacter = source[currentPosition++];
721 if (currentCharacter == '<') {
722 if (getNextChar('?')) {
723 currentCharacter = source[currentPosition++];
724 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
726 startPosition = currentPosition;
728 if (tokenizeWhiteSpace) {
729 // && (whiteStart != currentPosition - 1)) {
730 // reposition scanner in case we are interested by spaces as tokens
731 startPosition = htmlPosition;
732 return TokenNameHTML;
735 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
737 int test = getNextChar('H', 'h');
739 test = getNextChar('P', 'p');
742 startPosition = currentPosition;
745 if (tokenizeWhiteSpace) {
746 // && (whiteStart != currentPosition - 1)) {
747 // reposition scanner in case we are interested by spaces as tokens
748 startPosition = htmlPosition;
749 return TokenNameHTML;
758 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
759 if (recordLineSeparator) {
766 } //-----------------end switch while try--------------------
767 catch (IndexOutOfBoundsException e) {
768 if (tokenizeWhiteSpace) {
769 // && (whiteStart != currentPosition - 1)) {
770 // reposition scanner in case we are interested by spaces as tokens
771 startPosition = htmlPosition;
779 jumpOverMethodBody();
781 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
784 while (true) { //loop for jumping over comments
785 withoutUnicodePtr = 0;
786 //start with a new token (even comment written with unicode )
788 // ---------Consume white space and handles startPosition---------
789 int whiteStart = currentPosition;
790 boolean isWhiteSpace;
792 startPosition = currentPosition;
793 currentCharacter = source[currentPosition++];
794 // if (((currentCharacter = source[currentPosition++]) == '\\')
795 // && (source[currentPosition] == 'u')) {
796 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
798 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
799 checkNonExternalizeString();
800 if (recordLineSeparator) {
806 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
808 } while (isWhiteSpace);
809 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
810 // reposition scanner in case we are interested by spaces as tokens
812 startPosition = whiteStart;
813 return TokenNameWHITESPACE;
815 //little trick to get out in the middle of a source compuation
816 if (currentPosition > eofPosition)
819 // ---------Identify the next token-------------
821 switch (currentCharacter) {
823 return TokenNameLPAREN;
825 return TokenNameRPAREN;
827 return TokenNameLBRACE;
829 return TokenNameRBRACE;
831 return TokenNameLBRACKET;
833 return TokenNameRBRACKET;
835 return TokenNameSEMICOLON;
837 return TokenNameCOMMA;
840 if (getNextCharAsDigit())
841 return scanNumber(true);
846 if ((test = getNextChar('+', '=')) == 0)
847 return TokenNamePLUS_PLUS;
849 return TokenNamePLUS_EQUAL;
850 return TokenNamePLUS;
855 if ((test = getNextChar('-', '=')) == 0)
856 return TokenNameMINUS_MINUS;
858 return TokenNameMINUS_EQUAL;
859 if (getNextChar('>'))
860 return TokenNameMINUS_GREATER;
862 return TokenNameMINUS;
865 if (getNextChar('='))
866 return TokenNameTWIDDLE_EQUAL;
867 return TokenNameTWIDDLE;
869 if (getNextChar('=')) {
870 if (getNextChar('=')) {
871 return TokenNameNOT_EQUAL_EQUAL;
873 return TokenNameNOT_EQUAL;
877 if (getNextChar('='))
878 return TokenNameMULTIPLY_EQUAL;
879 return TokenNameMULTIPLY;
881 if (getNextChar('='))
882 return TokenNameREMAINDER_EQUAL;
883 return TokenNameREMAINDER;
887 if ((test = getNextChar('=', '<')) == 0)
888 return TokenNameLESS_EQUAL;
890 if (getNextChar('='))
891 return TokenNameLEFT_SHIFT_EQUAL;
892 if (getNextChar('<')) {
893 int heredocStart = currentPosition;
894 int heredocLength = 0;
895 currentCharacter = source[currentPosition++];
896 if (isPHPIdentifierStart(currentCharacter)) {
897 currentCharacter = source[currentPosition++];
899 return TokenNameERROR;
901 while (isPHPIdentifierPart(currentCharacter)) {
902 currentCharacter = source[currentPosition++];
905 heredocLength = currentPosition - heredocStart - 1;
907 // heredoc end-tag determination
908 boolean endTag = true;
911 ch = source[currentPosition++];
912 if (ch == '\r' || ch == '\n') {
913 if (recordLineSeparator) {
918 for (int i = 0; i < heredocLength; i++) {
919 if (source[currentPosition + i] != source[heredocStart + i]) {
925 currentPosition += heredocLength - 1;
926 currentCharacter = source[currentPosition++];
927 break; // do...while loop
935 return TokenNameHEREDOC;
937 return TokenNameLEFT_SHIFT;
939 return TokenNameLESS;
944 if ((test = getNextChar('=', '>')) == 0)
945 return TokenNameGREATER_EQUAL;
947 if ((test = getNextChar('=', '>')) == 0)
948 return TokenNameRIGHT_SHIFT_EQUAL;
949 return TokenNameRIGHT_SHIFT;
951 return TokenNameGREATER;
954 if (getNextChar('=')) {
955 if (getNextChar('=')) {
956 return TokenNameEQUAL_EQUAL_EQUAL;
958 return TokenNameEQUAL_EQUAL;
960 if (getNextChar('>'))
961 return TokenNameEQUAL_GREATER;
962 return TokenNameEQUAL;
966 if ((test = getNextChar('&', '=')) == 0)
967 return TokenNameAND_AND;
969 return TokenNameAND_EQUAL;
975 if ((test = getNextChar('|', '=')) == 0)
976 return TokenNameOR_OR;
978 return TokenNameOR_EQUAL;
982 if (getNextChar('='))
983 return TokenNameXOR_EQUAL;
986 if (getNextChar('>')) {
988 return TokenNameStopPHP;
990 return TokenNameQUESTION;
992 if (getNextChar(':'))
993 return TokenNameCOLON_COLON;
994 return TokenNameCOLON;
1000 // if ((test = getNextChar('\n', '\r')) == 0) {
1001 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1004 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1005 // for (int lookAhead = 0;
1008 // if (currentPosition + lookAhead
1009 // == source.length)
1011 // if (source[currentPosition + lookAhead]
1014 // if (source[currentPosition + lookAhead]
1016 // currentPosition += lookAhead + 1;
1020 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1023 // if (getNextChar('\'')) {
1024 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1025 // for (int lookAhead = 0;
1028 // if (currentPosition + lookAhead
1029 // == source.length)
1031 // if (source[currentPosition + lookAhead]
1034 // if (source[currentPosition + lookAhead]
1036 // currentPosition += lookAhead + 1;
1040 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1042 // if (getNextChar('\\'))
1043 // scanEscapeCharacter();
1044 // else { // consume next character
1045 // unicodeAsBackSlash = false;
1046 // if (((currentCharacter = source[currentPosition++])
1048 // && (source[currentPosition] == 'u')) {
1049 // getNextUnicodeChar();
1051 // if (withoutUnicodePtr != 0) {
1052 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1053 // currentCharacter;
1057 // // if (getNextChar('\''))
1058 // // return TokenNameCharacterLiteral;
1059 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1060 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1061 // if (currentPosition + lookAhead == source.length)
1063 // if (source[currentPosition + lookAhead] == '\n')
1065 // if (source[currentPosition + lookAhead] == '\'') {
1066 // currentPosition += lookAhead + 1;
1070 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1073 // consume next character
1074 unicodeAsBackSlash = false;
1075 currentCharacter = source[currentPosition++];
1076 // if (((currentCharacter = source[currentPosition++]) == '\\')
1077 // && (source[currentPosition] == 'u')) {
1078 // getNextUnicodeChar();
1080 // if (withoutUnicodePtr != 0) {
1081 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1082 // currentCharacter;
1086 while (currentCharacter != '\'') {
1088 /**** in PHP \r and \n are valid in string literals ****/
1089 // if ((currentCharacter == '\n')
1090 // || (currentCharacter == '\r')) {
1091 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1092 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1093 // if (currentPosition + lookAhead == source.length)
1095 // if (source[currentPosition + lookAhead] == '\n')
1097 // if (source[currentPosition + lookAhead] == '\"') {
1098 // currentPosition += lookAhead + 1;
1102 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1104 if (currentCharacter == '\\') {
1105 int escapeSize = currentPosition;
1106 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1107 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1108 scanSingleQuotedEscapeCharacter();
1109 escapeSize = currentPosition - escapeSize;
1110 if (withoutUnicodePtr == 0) {
1111 //buffer all the entries that have been left aside....
1112 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1113 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1114 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1115 } else { //overwrite the / in the buffer
1116 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1117 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1118 withoutUnicodePtr--;
1122 // consume next character
1123 unicodeAsBackSlash = false;
1124 currentCharacter = source[currentPosition++];
1125 // if (((currentCharacter = source[currentPosition++]) == '\\')
1126 // && (source[currentPosition] == 'u')) {
1127 // getNextUnicodeChar();
1129 if (withoutUnicodePtr != 0) {
1130 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1135 } catch (IndexOutOfBoundsException e) {
1136 throw new InvalidInputException(UNTERMINATED_STRING);
1137 } catch (InvalidInputException e) {
1138 if (e.getMessage().equals(INVALID_ESCAPE)) {
1139 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1140 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1141 if (currentPosition + lookAhead == source.length)
1143 if (source[currentPosition + lookAhead] == '\n')
1145 if (source[currentPosition + lookAhead] == '\'') {
1146 currentPosition += lookAhead + 1;
1154 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1155 if (currentLine == null) {
1156 currentLine = new NLSLine();
1157 lines.add(currentLine);
1159 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1161 return TokenNameStringConstant;
1164 // consume next character
1165 unicodeAsBackSlash = false;
1166 currentCharacter = source[currentPosition++];
1167 // if (((currentCharacter = source[currentPosition++]) == '\\')
1168 // && (source[currentPosition] == 'u')) {
1169 // getNextUnicodeChar();
1171 // if (withoutUnicodePtr != 0) {
1172 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1173 // currentCharacter;
1177 while (currentCharacter != '"') {
1179 /**** in PHP \r and \n are valid in string literals ****/
1180 // if ((currentCharacter == '\n')
1181 // || (currentCharacter == '\r')) {
1182 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1183 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1184 // if (currentPosition + lookAhead == source.length)
1186 // if (source[currentPosition + lookAhead] == '\n')
1188 // if (source[currentPosition + lookAhead] == '\"') {
1189 // currentPosition += lookAhead + 1;
1193 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1195 if (currentCharacter == '\\') {
1196 int escapeSize = currentPosition;
1197 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1198 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1199 scanDoubleQuotedEscapeCharacter();
1200 escapeSize = currentPosition - escapeSize;
1201 if (withoutUnicodePtr == 0) {
1202 //buffer all the entries that have been left aside....
1203 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1204 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1205 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1206 } else { //overwrite the / in the buffer
1207 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1208 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1209 withoutUnicodePtr--;
1213 // consume next character
1214 unicodeAsBackSlash = false;
1215 currentCharacter = source[currentPosition++];
1216 // if (((currentCharacter = source[currentPosition++]) == '\\')
1217 // && (source[currentPosition] == 'u')) {
1218 // getNextUnicodeChar();
1220 if (withoutUnicodePtr != 0) {
1221 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1226 } catch (IndexOutOfBoundsException e) {
1227 throw new InvalidInputException(UNTERMINATED_STRING);
1228 } catch (InvalidInputException e) {
1229 if (e.getMessage().equals(INVALID_ESCAPE)) {
1230 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1231 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1232 if (currentPosition + lookAhead == source.length)
1234 if (source[currentPosition + lookAhead] == '\n')
1236 if (source[currentPosition + lookAhead] == '\"') {
1237 currentPosition += lookAhead + 1;
1245 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1246 if (currentLine == null) {
1247 currentLine = new NLSLine();
1248 lines.add(currentLine);
1250 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1252 return TokenNameStringLiteral;
1255 // consume next character
1256 unicodeAsBackSlash = false;
1257 currentCharacter = source[currentPosition++];
1258 // if (((currentCharacter = source[currentPosition++]) == '\\')
1259 // && (source[currentPosition] == 'u')) {
1260 // getNextUnicodeChar();
1262 // if (withoutUnicodePtr != 0) {
1263 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1264 // currentCharacter;
1268 while (currentCharacter != '`') {
1270 /**** in PHP \r and \n are valid in string literals ****/
1271 // if ((currentCharacter == '\n')
1272 // || (currentCharacter == '\r')) {
1273 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1274 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1275 // if (currentPosition + lookAhead == source.length)
1277 // if (source[currentPosition + lookAhead] == '\n')
1279 // if (source[currentPosition + lookAhead] == '\"') {
1280 // currentPosition += lookAhead + 1;
1284 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1286 if (currentCharacter == '\\') {
1287 int escapeSize = currentPosition;
1288 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1289 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1290 scanDoubleQuotedEscapeCharacter();
1291 escapeSize = currentPosition - escapeSize;
1292 if (withoutUnicodePtr == 0) {
1293 //buffer all the entries that have been left aside....
1294 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1295 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1296 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1297 } else { //overwrite the / in the buffer
1298 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1299 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1300 withoutUnicodePtr--;
1304 // consume next character
1305 unicodeAsBackSlash = false;
1306 currentCharacter = source[currentPosition++];
1307 // if (((currentCharacter = source[currentPosition++]) == '\\')
1308 // && (source[currentPosition] == 'u')) {
1309 // getNextUnicodeChar();
1311 if (withoutUnicodePtr != 0) {
1312 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1317 } catch (IndexOutOfBoundsException e) {
1318 throw new InvalidInputException(UNTERMINATED_STRING);
1319 } catch (InvalidInputException e) {
1320 if (e.getMessage().equals(INVALID_ESCAPE)) {
1321 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1322 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1323 if (currentPosition + lookAhead == source.length)
1325 if (source[currentPosition + lookAhead] == '\n')
1327 if (source[currentPosition + lookAhead] == '`') {
1328 currentPosition += lookAhead + 1;
1336 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1337 if (currentLine == null) {
1338 currentLine = new NLSLine();
1339 lines.add(currentLine);
1341 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1343 return TokenNameStringInterpolated;
1348 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1350 int endPositionForLineComment = 0;
1351 try { //get the next char
1352 currentCharacter = source[currentPosition++];
1353 // if (((currentCharacter = source[currentPosition++])
1355 // && (source[currentPosition] == 'u')) {
1356 // //-------------unicode traitement ------------
1357 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1358 // currentPosition++;
1359 // while (source[currentPosition] == 'u') {
1360 // currentPosition++;
1363 // Character.getNumericValue(source[currentPosition++]))
1367 // Character.getNumericValue(source[currentPosition++]))
1371 // Character.getNumericValue(source[currentPosition++]))
1375 // Character.getNumericValue(source[currentPosition++]))
1378 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1380 // currentCharacter =
1381 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1385 //handle the \\u case manually into comment
1386 // if (currentCharacter == '\\') {
1387 // if (source[currentPosition] == '\\')
1388 // currentPosition++;
1389 // } //jump over the \\
1390 boolean isUnicode = false;
1391 while (currentCharacter != '\r' && currentCharacter != '\n') {
1392 if (currentCharacter == '?') {
1393 if (getNextChar('>')) {
1394 startPosition = currentPosition - 2;
1396 return TokenNameStopPHP;
1402 currentCharacter = source[currentPosition++];
1403 // if (((currentCharacter = source[currentPosition++])
1405 // && (source[currentPosition] == 'u')) {
1406 // isUnicode = true;
1407 // //-------------unicode traitement ------------
1408 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1409 // currentPosition++;
1410 // while (source[currentPosition] == 'u') {
1411 // currentPosition++;
1414 // Character.getNumericValue(source[currentPosition++]))
1418 // Character.getNumericValue(
1419 // source[currentPosition++]))
1423 // Character.getNumericValue(
1424 // source[currentPosition++]))
1428 // Character.getNumericValue(
1429 // source[currentPosition++]))
1432 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1434 // currentCharacter =
1435 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1438 //handle the \\u case manually into comment
1439 // if (currentCharacter == '\\') {
1440 // if (source[currentPosition] == '\\')
1441 // currentPosition++;
1442 // } //jump over the \\
1445 endPositionForLineComment = currentPosition - 6;
1447 endPositionForLineComment = currentPosition - 1;
1449 recordComment(false);
1450 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1451 checkNonExternalizeString();
1452 if (recordLineSeparator) {
1454 pushUnicodeLineSeparator();
1456 pushLineSeparator();
1462 if (tokenizeComments) {
1464 currentPosition = endPositionForLineComment;
1465 // reset one character behind
1467 return TokenNameCOMMENT_LINE;
1469 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1470 if (tokenizeComments) {
1472 // reset one character behind
1473 return TokenNameCOMMENT_LINE;
1479 //traditional and annotation comment
1480 boolean isJavadoc = false, star = false;
1481 // consume next character
1482 unicodeAsBackSlash = false;
1483 currentCharacter = source[currentPosition++];
1484 // if (((currentCharacter = source[currentPosition++]) == '\\')
1485 // && (source[currentPosition] == 'u')) {
1486 // getNextUnicodeChar();
1488 // if (withoutUnicodePtr != 0) {
1489 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1490 // currentCharacter;
1494 if (currentCharacter == '*') {
1498 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1499 checkNonExternalizeString();
1500 if (recordLineSeparator) {
1501 pushLineSeparator();
1506 try { //get the next char
1507 currentCharacter = source[currentPosition++];
1508 // if (((currentCharacter = source[currentPosition++])
1510 // && (source[currentPosition] == 'u')) {
1511 // //-------------unicode traitement ------------
1512 // getNextUnicodeChar();
1514 //handle the \\u case manually into comment
1515 // if (currentCharacter == '\\') {
1516 // if (source[currentPosition] == '\\')
1517 // currentPosition++;
1518 // //jump over the \\
1520 // empty comment is not a javadoc /**/
1521 if (currentCharacter == '/') {
1524 //loop until end of comment */
1525 while ((currentCharacter != '/') || (!star)) {
1526 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1527 checkNonExternalizeString();
1528 if (recordLineSeparator) {
1529 pushLineSeparator();
1534 star = currentCharacter == '*';
1536 currentCharacter = source[currentPosition++];
1537 // if (((currentCharacter = source[currentPosition++])
1539 // && (source[currentPosition] == 'u')) {
1540 // //-------------unicode traitement ------------
1541 // getNextUnicodeChar();
1543 //handle the \\u case manually into comment
1544 // if (currentCharacter == '\\') {
1545 // if (source[currentPosition] == '\\')
1546 // currentPosition++;
1547 // } //jump over the \\
1549 recordComment(isJavadoc);
1550 if (tokenizeComments) {
1552 return TokenNameCOMMENT_PHPDOC;
1553 return TokenNameCOMMENT_BLOCK;
1555 } catch (IndexOutOfBoundsException e) {
1556 throw new InvalidInputException(UNTERMINATED_COMMENT);
1560 if (getNextChar('='))
1561 return TokenNameDIVIDE_EQUAL;
1562 return TokenNameDIVIDE;
1566 return TokenNameEOF;
1567 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1568 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1571 if (currentCharacter == '$') {
1572 while ((currentCharacter = source[currentPosition++]) == '$') {
1574 if (currentCharacter == '{')
1575 return TokenNameDOLLAR_LBRACE;
1576 if (isPHPIdentifierStart(currentCharacter))
1577 return scanIdentifierOrKeyword(true);
1578 return TokenNameERROR;
1580 if (isPHPIdentifierStart(currentCharacter))
1581 return scanIdentifierOrKeyword(false);
1582 if (Character.isDigit(currentCharacter))
1583 return scanNumber(false);
1584 return TokenNameERROR;
1587 } //-----------------end switch while try--------------------
1588 catch (IndexOutOfBoundsException e) {
1591 return TokenNameEOF;
1594 // public final void getNextUnicodeChar()
1595 // throws IndexOutOfBoundsException, InvalidInputException {
1597 // //handle the case of unicode.
1598 // //when a unicode appears then we must use a buffer that holds char internal values
1599 // //At the end of this method currentCharacter holds the new visited char
1600 // //and currentPosition points right next after it
1602 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1604 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1605 // currentPosition++;
1606 // while (source[currentPosition] == 'u') {
1607 // currentPosition++;
1611 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1613 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1615 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1617 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1619 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1621 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1622 // //need the unicode buffer
1623 // if (withoutUnicodePtr == 0) {
1624 // //buffer all the entries that have been left aside....
1625 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1626 // System.arraycopy(
1629 // withoutUnicodeBuffer,
1631 // withoutUnicodePtr);
1633 // //fill the buffer with the char
1634 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1636 // unicodeAsBackSlash = currentCharacter == '\\';
1638 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1640 public final void jumpOverMethodBody() {
1642 this.wasAcr = false;
1645 while (true) { //loop for jumping over comments
1646 // ---------Consume white space and handles startPosition---------
1647 boolean isWhiteSpace;
1649 startPosition = currentPosition;
1650 currentCharacter = source[currentPosition++];
1651 // if (((currentCharacter = source[currentPosition++]) == '\\')
1652 // && (source[currentPosition] == 'u')) {
1653 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1655 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1656 pushLineSeparator();
1657 isWhiteSpace = Character.isWhitespace(currentCharacter);
1659 } while (isWhiteSpace);
1661 // -------consume token until } is found---------
1662 switch (currentCharacter) {
1674 test = getNextChar('\\');
1677 scanDoubleQuotedEscapeCharacter();
1678 } catch (InvalidInputException ex) {
1681 // try { // consume next character
1682 unicodeAsBackSlash = false;
1683 currentCharacter = source[currentPosition++];
1684 // if (((currentCharacter = source[currentPosition++]) == '\\')
1685 // && (source[currentPosition] == 'u')) {
1686 // getNextUnicodeChar();
1688 if (withoutUnicodePtr != 0) {
1689 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1692 // } catch (InvalidInputException ex) {
1700 // try { // consume next character
1701 unicodeAsBackSlash = false;
1702 currentCharacter = source[currentPosition++];
1703 // if (((currentCharacter = source[currentPosition++]) == '\\')
1704 // && (source[currentPosition] == 'u')) {
1705 // getNextUnicodeChar();
1707 if (withoutUnicodePtr != 0) {
1708 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1711 // } catch (InvalidInputException ex) {
1713 while (currentCharacter != '"') {
1714 if (currentCharacter == '\r') {
1715 if (source[currentPosition] == '\n')
1718 // the string cannot go further that the line
1720 if (currentCharacter == '\n') {
1722 // the string cannot go further that the line
1724 if (currentCharacter == '\\') {
1726 scanDoubleQuotedEscapeCharacter();
1727 } catch (InvalidInputException ex) {
1730 // try { // consume next character
1731 unicodeAsBackSlash = false;
1732 currentCharacter = source[currentPosition++];
1733 // if (((currentCharacter = source[currentPosition++]) == '\\')
1734 // && (source[currentPosition] == 'u')) {
1735 // getNextUnicodeChar();
1737 if (withoutUnicodePtr != 0) {
1738 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1741 // } catch (InvalidInputException ex) {
1744 } catch (IndexOutOfBoundsException e) {
1751 if ((test = getNextChar('/', '*')) == 0) {
1755 currentCharacter = source[currentPosition++];
1756 // if (((currentCharacter = source[currentPosition++]) == '\\')
1757 // && (source[currentPosition] == 'u')) {
1758 // //-------------unicode traitement ------------
1759 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1760 // currentPosition++;
1761 // while (source[currentPosition] == 'u') {
1762 // currentPosition++;
1765 // Character.getNumericValue(source[currentPosition++]))
1769 // Character.getNumericValue(source[currentPosition++]))
1773 // Character.getNumericValue(source[currentPosition++]))
1777 // Character.getNumericValue(source[currentPosition++]))
1780 // //error don't care of the value
1781 // currentCharacter = 'A';
1782 // } //something different from \n and \r
1784 // currentCharacter =
1785 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1789 while (currentCharacter != '\r' && currentCharacter != '\n') {
1791 currentCharacter = source[currentPosition++];
1792 // if (((currentCharacter = source[currentPosition++])
1794 // && (source[currentPosition] == 'u')) {
1795 // //-------------unicode traitement ------------
1796 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1797 // currentPosition++;
1798 // while (source[currentPosition] == 'u') {
1799 // currentPosition++;
1802 // Character.getNumericValue(source[currentPosition++]))
1806 // Character.getNumericValue(source[currentPosition++]))
1810 // Character.getNumericValue(source[currentPosition++]))
1814 // Character.getNumericValue(source[currentPosition++]))
1817 // //error don't care of the value
1818 // currentCharacter = 'A';
1819 // } //something different from \n and \r
1821 // currentCharacter =
1822 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1826 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1827 pushLineSeparator();
1828 } catch (IndexOutOfBoundsException e) {
1829 } //an eof will them be generated
1833 //traditional and annotation comment
1834 boolean star = false;
1835 // try { // consume next character
1836 unicodeAsBackSlash = false;
1837 currentCharacter = source[currentPosition++];
1838 // if (((currentCharacter = source[currentPosition++]) == '\\')
1839 // && (source[currentPosition] == 'u')) {
1840 // getNextUnicodeChar();
1842 if (withoutUnicodePtr != 0) {
1843 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1846 // } catch (InvalidInputException ex) {
1848 if (currentCharacter == '*') {
1851 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1852 pushLineSeparator();
1853 try { //get the next char
1854 currentCharacter = source[currentPosition++];
1855 // if (((currentCharacter = source[currentPosition++]) == '\\')
1856 // && (source[currentPosition] == 'u')) {
1857 // //-------------unicode traitement ------------
1858 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1859 // currentPosition++;
1860 // while (source[currentPosition] == 'u') {
1861 // currentPosition++;
1864 // Character.getNumericValue(source[currentPosition++]))
1868 // Character.getNumericValue(source[currentPosition++]))
1872 // Character.getNumericValue(source[currentPosition++]))
1876 // Character.getNumericValue(source[currentPosition++]))
1879 // //error don't care of the value
1880 // currentCharacter = 'A';
1881 // } //something different from * and /
1883 // currentCharacter =
1884 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1887 //loop until end of comment */
1888 while ((currentCharacter != '/') || (!star)) {
1889 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1890 pushLineSeparator();
1891 star = currentCharacter == '*';
1893 currentCharacter = source[currentPosition++];
1894 // if (((currentCharacter = source[currentPosition++])
1896 // && (source[currentPosition] == 'u')) {
1897 // //-------------unicode traitement ------------
1898 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1899 // currentPosition++;
1900 // while (source[currentPosition] == 'u') {
1901 // currentPosition++;
1904 // Character.getNumericValue(source[currentPosition++]))
1908 // Character.getNumericValue(source[currentPosition++]))
1912 // Character.getNumericValue(source[currentPosition++]))
1916 // Character.getNumericValue(source[currentPosition++]))
1919 // //error don't care of the value
1920 // currentCharacter = 'A';
1921 // } //something different from * and /
1923 // currentCharacter =
1924 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1928 } catch (IndexOutOfBoundsException e) {
1937 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1939 scanIdentifierOrKeyword((currentCharacter == '$'));
1940 } catch (InvalidInputException ex) {
1944 if (Character.isDigit(currentCharacter)) {
1947 } catch (InvalidInputException ex) {
1953 //-----------------end switch while try--------------------
1954 } catch (IndexOutOfBoundsException e) {
1955 } catch (InvalidInputException e) {
1959 // public final boolean jumpOverUnicodeWhiteSpace()
1960 // throws InvalidInputException {
1962 // //handle the case of unicode. Jump over the next whiteSpace
1963 // //making startPosition pointing on the next available char
1964 // //On false, the currentCharacter is filled up with a potential
1968 // this.wasAcr = false;
1969 // int c1, c2, c3, c4;
1970 // int unicodeSize = 6;
1971 // currentPosition++;
1972 // while (source[currentPosition] == 'u') {
1973 // currentPosition++;
1977 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1979 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1981 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1983 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1985 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1988 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1989 // if (recordLineSeparator
1990 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1991 // pushLineSeparator();
1992 // if (Character.isWhitespace(currentCharacter))
1995 // //buffer the new char which is not a white space
1996 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1997 // //withoutUnicodePtr == 1 is true here
1999 // } catch (IndexOutOfBoundsException e) {
2000 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2003 public final int[] getLineEnds() {
2004 //return a bounded copy of this.lineEnds
2007 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2011 public char[] getSource() {
2014 final char[] optimizedCurrentTokenSource1() {
2015 //return always the same char[] build only once
2017 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2018 char charOne = source[startPosition];
2073 return new char[] { charOne };
2077 final char[] optimizedCurrentTokenSource2() {
2078 //try to return the same char[] build only once
2081 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2082 char[][] table = charArray_length[0][hash];
2084 while (++i < InternalTableSize) {
2085 char[] charArray = table[i];
2086 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2089 //---------other side---------
2091 int max = newEntry2;
2092 while (++i <= max) {
2093 char[] charArray = table[i];
2094 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2097 //--------add the entry-------
2098 if (++max >= InternalTableSize)
2101 table[max] = (r = new char[] { c0, c1 });
2106 final char[] optimizedCurrentTokenSource3() {
2107 //try to return the same char[] build only once
2111 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2113 char[][] table = charArray_length[1][hash];
2115 while (++i < InternalTableSize) {
2116 char[] charArray = table[i];
2117 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2120 //---------other side---------
2122 int max = newEntry3;
2123 while (++i <= max) {
2124 char[] charArray = table[i];
2125 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2128 //--------add the entry-------
2129 if (++max >= InternalTableSize)
2132 table[max] = (r = new char[] { c0, c1, c2 });
2137 final char[] optimizedCurrentTokenSource4() {
2138 //try to return the same char[] build only once
2140 char c0, c1, c2, c3;
2142 ((((long) (c0 = source[startPosition])) << 18)
2143 + ((c1 = source[startPosition + 1]) << 12)
2144 + ((c2 = source[startPosition + 2]) << 6)
2145 + (c3 = source[startPosition + 3]))
2147 char[][] table = charArray_length[2][(int) hash];
2149 while (++i < InternalTableSize) {
2150 char[] charArray = table[i];
2151 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2154 //---------other side---------
2156 int max = newEntry4;
2157 while (++i <= max) {
2158 char[] charArray = table[i];
2159 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2162 //--------add the entry-------
2163 if (++max >= InternalTableSize)
2166 table[max] = (r = new char[] { c0, c1, c2, c3 });
2172 final char[] optimizedCurrentTokenSource5() {
2173 //try to return the same char[] build only once
2175 char c0, c1, c2, c3, c4;
2177 ((((long) (c0 = source[startPosition])) << 24)
2178 + (((long) (c1 = source[startPosition + 1])) << 18)
2179 + ((c2 = source[startPosition + 2]) << 12)
2180 + ((c3 = source[startPosition + 3]) << 6)
2181 + (c4 = source[startPosition + 4]))
2183 char[][] table = charArray_length[3][(int) hash];
2185 while (++i < InternalTableSize) {
2186 char[] charArray = table[i];
2187 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2190 //---------other side---------
2192 int max = newEntry5;
2193 while (++i <= max) {
2194 char[] charArray = table[i];
2195 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2198 //--------add the entry-------
2199 if (++max >= InternalTableSize)
2202 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2208 final char[] optimizedCurrentTokenSource6() {
2209 //try to return the same char[] build only once
2211 char c0, c1, c2, c3, c4, c5;
2213 ((((long) (c0 = source[startPosition])) << 32)
2214 + (((long) (c1 = source[startPosition + 1])) << 24)
2215 + (((long) (c2 = source[startPosition + 2])) << 18)
2216 + ((c3 = source[startPosition + 3]) << 12)
2217 + ((c4 = source[startPosition + 4]) << 6)
2218 + (c5 = source[startPosition + 5]))
2220 char[][] table = charArray_length[4][(int) hash];
2222 while (++i < InternalTableSize) {
2223 char[] charArray = table[i];
2224 if ((c0 == charArray[0])
2225 && (c1 == charArray[1])
2226 && (c2 == charArray[2])
2227 && (c3 == charArray[3])
2228 && (c4 == charArray[4])
2229 && (c5 == charArray[5]))
2232 //---------other side---------
2234 int max = newEntry6;
2235 while (++i <= max) {
2236 char[] charArray = table[i];
2237 if ((c0 == charArray[0])
2238 && (c1 == charArray[1])
2239 && (c2 == charArray[2])
2240 && (c3 == charArray[3])
2241 && (c4 == charArray[4])
2242 && (c5 == charArray[5]))
2245 //--------add the entry-------
2246 if (++max >= InternalTableSize)
2249 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2254 public final void pushLineSeparator() throws InvalidInputException {
2255 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2256 final int INCREMENT = 250;
2258 if (this.checkNonExternalizedStringLiterals) {
2259 // reinitialize the current line for non externalize strings purpose
2262 //currentCharacter is at position currentPosition-1
2265 if (currentCharacter == '\r') {
2266 int separatorPos = currentPosition - 1;
2267 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2269 //System.out.println("CR-" + separatorPos);
2271 lineEnds[++linePtr] = separatorPos;
2272 } catch (IndexOutOfBoundsException e) {
2273 //linePtr value is correct
2274 int oldLength = lineEnds.length;
2275 int[] old = lineEnds;
2276 lineEnds = new int[oldLength + INCREMENT];
2277 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2278 lineEnds[linePtr] = separatorPos;
2280 // look-ahead for merged cr+lf
2282 if (source[currentPosition] == '\n') {
2283 //System.out.println("look-ahead LF-" + currentPosition);
2284 lineEnds[linePtr] = currentPosition;
2290 } catch (IndexOutOfBoundsException e) {
2295 if (currentCharacter == '\n') {
2296 //must merge eventual cr followed by lf
2297 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2298 //System.out.println("merge LF-" + (currentPosition - 1));
2299 lineEnds[linePtr] = currentPosition - 1;
2301 int separatorPos = currentPosition - 1;
2302 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2304 // System.out.println("LF-" + separatorPos);
2306 lineEnds[++linePtr] = separatorPos;
2307 } catch (IndexOutOfBoundsException e) {
2308 //linePtr value is correct
2309 int oldLength = lineEnds.length;
2310 int[] old = lineEnds;
2311 lineEnds = new int[oldLength + INCREMENT];
2312 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2313 lineEnds[linePtr] = separatorPos;
2320 public final void pushUnicodeLineSeparator() {
2321 // isUnicode means that the \r or \n has been read as a unicode character
2323 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2325 final int INCREMENT = 250;
2326 //currentCharacter is at position currentPosition-1
2328 if (this.checkNonExternalizedStringLiterals) {
2329 // reinitialize the current line for non externalize strings purpose
2334 if (currentCharacter == '\r') {
2335 int separatorPos = currentPosition - 6;
2336 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2338 //System.out.println("CR-" + separatorPos);
2340 lineEnds[++linePtr] = separatorPos;
2341 } catch (IndexOutOfBoundsException e) {
2342 //linePtr value is correct
2343 int oldLength = lineEnds.length;
2344 int[] old = lineEnds;
2345 lineEnds = new int[oldLength + INCREMENT];
2346 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2347 lineEnds[linePtr] = separatorPos;
2349 // look-ahead for merged cr+lf
2350 if (source[currentPosition] == '\n') {
2351 //System.out.println("look-ahead LF-" + currentPosition);
2352 lineEnds[linePtr] = currentPosition;
2360 if (currentCharacter == '\n') {
2361 //must merge eventual cr followed by lf
2362 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2363 //System.out.println("merge LF-" + (currentPosition - 1));
2364 lineEnds[linePtr] = currentPosition - 6;
2366 int separatorPos = currentPosition - 6;
2367 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2369 // System.out.println("LF-" + separatorPos);
2371 lineEnds[++linePtr] = separatorPos;
2372 } catch (IndexOutOfBoundsException e) {
2373 //linePtr value is correct
2374 int oldLength = lineEnds.length;
2375 int[] old = lineEnds;
2376 lineEnds = new int[oldLength + INCREMENT];
2377 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2378 lineEnds[linePtr] = separatorPos;
2385 public final void recordComment(boolean isJavadoc) {
2387 // a new annotation comment is recorded
2389 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2390 } catch (IndexOutOfBoundsException e) {
2391 int oldStackLength = commentStops.length;
2392 int[] oldStack = commentStops;
2393 commentStops = new int[oldStackLength + 30];
2394 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2395 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2396 //grows the positions buffers too
2397 int[] old = commentStarts;
2398 commentStarts = new int[oldStackLength + 30];
2399 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2402 //the buffer is of a correct size here
2403 commentStarts[commentPtr] = startPosition;
2405 public void resetTo(int begin, int end) {
2406 //reset the scanner to a given position where it may rescan again
2409 initialPosition = startPosition = currentPosition = begin;
2410 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2411 commentPtr = -1; // reset comment stack
2414 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2415 // the string with "\\u" is a legal string of two chars \ and u
2416 //thus we use a direct access to the source (for regular cases).
2418 // if (unicodeAsBackSlash) {
2419 // // consume next character
2420 // unicodeAsBackSlash = false;
2421 // if (((currentCharacter = source[currentPosition++]) == '\\')
2422 // && (source[currentPosition] == 'u')) {
2423 // getNextUnicodeChar();
2425 // if (withoutUnicodePtr != 0) {
2426 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2430 currentCharacter = source[currentPosition++];
2431 switch (currentCharacter) {
2433 currentCharacter = '\'';
2436 currentCharacter = '\\';
2439 currentCharacter = '\\';
2444 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2445 // the string with "\\u" is a legal string of two chars \ and u
2446 //thus we use a direct access to the source (for regular cases).
2448 // if (unicodeAsBackSlash) {
2449 // // consume next character
2450 // unicodeAsBackSlash = false;
2451 // if (((currentCharacter = source[currentPosition++]) == '\\')
2452 // && (source[currentPosition] == 'u')) {
2453 // getNextUnicodeChar();
2455 // if (withoutUnicodePtr != 0) {
2456 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2460 currentCharacter = source[currentPosition++];
2461 switch (currentCharacter) {
2463 // currentCharacter = '\b';
2466 currentCharacter = '\t';
2469 currentCharacter = '\n';
2472 // currentCharacter = '\f';
2475 currentCharacter = '\r';
2478 currentCharacter = '\"';
2481 currentCharacter = '\'';
2484 currentCharacter = '\\';
2487 currentCharacter = '$';
2490 // -----------octal escape--------------
2492 // OctalDigit OctalDigit
2493 // ZeroToThree OctalDigit OctalDigit
2495 int number = Character.getNumericValue(currentCharacter);
2496 if (number >= 0 && number <= 7) {
2497 boolean zeroToThreeNot = number > 3;
2498 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2499 int digit = Character.getNumericValue(currentCharacter);
2500 if (digit >= 0 && digit <= 7) {
2501 number = (number * 8) + digit;
2502 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2506 digit = Character.getNumericValue(currentCharacter);
2507 if (digit >= 0 && digit <= 7) {
2508 // has read \ZeroToThree OctalDigit OctalDigit
2509 number = (number * 8) + digit;
2510 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2514 } else { // has read \OctalDigit NonDigit--> ignore last character
2517 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2520 } else { // has read \OctalDigit --> ignore last character
2524 throw new InvalidInputException(INVALID_ESCAPE);
2525 currentCharacter = (char) number;
2528 // throw new InvalidInputException(INVALID_ESCAPE);
2532 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2533 // return scanIdentifierOrKeyword( false );
2536 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2539 //first dispatch on the first char.
2540 //then the length. If there are several
2541 //keywors with the same length AND the same first char, then do another
2542 //disptach on the second char :-)...cool....but fast !
2544 useAssertAsAnIndentifier = false;
2546 while (getNextCharAsJavaIdentifierPart()) {
2550 if (new String(getCurrentTokenSource()).equals("$this")) {
2551 return TokenNamethis;
2553 return TokenNameVariable;
2558 // if (withoutUnicodePtr == 0)
2560 //quick test on length == 1 but not on length > 12 while most identifier
2561 //have a length which is <= 12...but there are lots of identifier with
2565 if ((length = currentPosition - startPosition) == 1)
2566 return TokenNameIdentifier;
2568 data = new char[length];
2569 index = startPosition;
2570 for (int i = 0; i < length; i++) {
2571 data[i] = Character.toLowerCase(source[index + i]);
2575 // if ((length = withoutUnicodePtr) == 1)
2576 // return TokenNameIdentifier;
2577 // // data = withoutUnicodeBuffer;
2578 // data = new char[withoutUnicodeBuffer.length];
2579 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2580 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2585 firstLetter = data[index];
2586 switch (firstLetter) {
2588 case 'a' : // as and array
2591 if ((data[++index] == 's')) {
2594 return TokenNameIdentifier;
2597 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2598 return TokenNameAND;
2600 return TokenNameIdentifier;
2603 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2604 // return TokenNamearray;
2606 // return TokenNameIdentifier;
2608 return TokenNameIdentifier;
2613 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2614 return TokenNamebreak;
2616 return TokenNameIdentifier;
2618 return TokenNameIdentifier;
2621 case 'c' : //case class continue
2624 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2625 return TokenNamecase;
2627 return TokenNameIdentifier;
2629 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2630 return TokenNameclass;
2632 return TokenNameIdentifier;
2634 if ((data[++index] == 'o')
2635 && (data[++index] == 'n')
2636 && (data[++index] == 't')
2637 && (data[++index] == 'i')
2638 && (data[++index] == 'n')
2639 && (data[++index] == 'u')
2640 && (data[++index] == 'e'))
2641 return TokenNamecontinue;
2643 return TokenNameIdentifier;
2645 return TokenNameIdentifier;
2648 case 'd' : //define default do
2651 if ((data[++index] == 'o'))
2654 return TokenNameIdentifier;
2656 if ((data[++index] == 'e')
2657 && (data[++index] == 'f')
2658 && (data[++index] == 'i')
2659 && (data[++index] == 'n')
2660 && (data[++index] == 'e'))
2661 return TokenNamedefine;
2663 return TokenNameIdentifier;
2665 if ((data[++index] == 'e')
2666 && (data[++index] == 'f')
2667 && (data[++index] == 'a')
2668 && (data[++index] == 'u')
2669 && (data[++index] == 'l')
2670 && (data[++index] == 't'))
2671 return TokenNamedefault;
2673 return TokenNameIdentifier;
2675 return TokenNameIdentifier;
2677 case 'e' : //echo else elseif extends
2680 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2681 return TokenNameecho;
2682 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2683 return TokenNameelse;
2685 return TokenNameIdentifier;
2687 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2688 return TokenNameendif;
2690 return TokenNameIdentifier;
2692 if ((data[++index] == 'n')
2693 && (data[++index] == 'd')
2694 && (data[++index] == 'f')
2695 && (data[++index] == 'o')
2696 && (data[++index] == 'r'))
2697 return TokenNameendfor;
2699 (data[index] == 'l')
2700 && (data[++index] == 's')
2701 && (data[++index] == 'e')
2702 && (data[++index] == 'i')
2703 && (data[++index] == 'f'))
2704 return TokenNameelseif;
2706 return TokenNameIdentifier;
2708 if ((data[++index] == 'x')
2709 && (data[++index] == 't')
2710 && (data[++index] == 'e')
2711 && (data[++index] == 'n')
2712 && (data[++index] == 'd')
2713 && (data[++index] == 's'))
2714 return TokenNameextends;
2716 return TokenNameIdentifier;
2717 case 8 : // endwhile
2718 if ((data[++index] == 'n')
2719 && (data[++index] == 'd')
2720 && (data[++index] == 'w')
2721 && (data[++index] == 'h')
2722 && (data[++index] == 'i')
2723 && (data[++index] == 'l')
2724 && (data[++index] == 'e'))
2725 return TokenNameendwhile;
2727 return TokenNameIdentifier;
2728 case 9 : // endswitch
2729 if ((data[++index] == 'n')
2730 && (data[++index] == 'd')
2731 && (data[++index] == 's')
2732 && (data[++index] == 'w')
2733 && (data[++index] == 'i')
2734 && (data[++index] == 't')
2735 && (data[++index] == 'c')
2736 && (data[++index] == 'h'))
2737 return TokenNameendswitch;
2739 return TokenNameIdentifier;
2740 case 10 : // endforeach
2741 if ((data[++index] == 'n')
2742 && (data[++index] == 'd')
2743 && (data[++index] == 'f')
2744 && (data[++index] == 'o')
2745 && (data[++index] == 'r')
2746 && (data[++index] == 'e')
2747 && (data[++index] == 'a')
2748 && (data[++index] == 'c')
2749 && (data[++index] == 'h'))
2750 return TokenNameendforeach;
2752 return TokenNameIdentifier;
2755 return TokenNameIdentifier;
2758 case 'f' : //for false function
2761 if ((data[++index] == 'o') && (data[++index] == 'r'))
2762 return TokenNamefor;
2764 return TokenNameIdentifier;
2766 if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2767 return TokenNamefalse;
2769 return TokenNameIdentifier;
2770 case 7 : // function
2771 if ((data[++index] == 'o')
2772 && (data[++index] == 'r')
2773 && (data[++index] == 'e')
2774 && (data[++index] == 'a')
2775 && (data[++index] == 'c')
2776 && (data[++index] == 'h'))
2777 return TokenNameforeach;
2779 return TokenNameIdentifier;
2780 case 8 : // function
2781 if ((data[++index] == 'u')
2782 && (data[++index] == 'n')
2783 && (data[++index] == 'c')
2784 && (data[++index] == 't')
2785 && (data[++index] == 'i')
2786 && (data[++index] == 'o')
2787 && (data[++index] == 'n'))
2788 return TokenNamefunction;
2790 return TokenNameIdentifier;
2792 return TokenNameIdentifier;
2796 if ((data[++index] == 'l')
2797 && (data[++index] == 'o')
2798 && (data[++index] == 'b')
2799 && (data[++index] == 'a')
2800 && (data[++index] == 'l')) {
2801 return TokenNameglobal;
2804 return TokenNameIdentifier;
2809 if (data[++index] == 'f')
2812 return TokenNameIdentifier;
2814 // if ((data[++index] == 'n') && (data[++index] == 't'))
2815 // return TokenNameint;
2817 // return TokenNameIdentifier;
2819 if ((data[++index] == 'n')
2820 && (data[++index] == 'c')
2821 && (data[++index] == 'l')
2822 && (data[++index] == 'u')
2823 && (data[++index] == 'd')
2824 && (data[++index] == 'e'))
2825 return TokenNameinclude;
2827 return TokenNameIdentifier;
2829 if ((data[++index] == 'n')
2830 && (data[++index] == 'c')
2831 && (data[++index] == 'l')
2832 && (data[++index] == 'u')
2833 && (data[++index] == 'd')
2834 && (data[++index] == 'e')
2835 && (data[++index] == '_')
2836 && (data[++index] == 'o')
2837 && (data[++index] == 'n')
2838 && (data[++index] == 'c')
2839 && (data[++index] == 'e'))
2840 return TokenNameinclude_once;
2842 return TokenNameIdentifier;
2844 return TokenNameIdentifier;
2849 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2850 return TokenNamelist;
2853 return TokenNameIdentifier;
2855 case 'n' : // new null
2858 if ((data[++index] == 'e') && (data[++index] == 'w'))
2859 return TokenNamenew;
2861 return TokenNameIdentifier;
2863 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2864 return TokenNamenull;
2866 return TokenNameIdentifier;
2869 return TokenNameIdentifier;
2871 case 'o' : // or old_function
2873 if (data[++index] == 'r') {
2877 // if (length == 12) {
2878 // if ((data[++index] == 'l')
2879 // && (data[++index] == 'd')
2880 // && (data[++index] == '_')
2881 // && (data[++index] == 'f')
2882 // && (data[++index] == 'u')
2883 // && (data[++index] == 'n')
2884 // && (data[++index] == 'c')
2885 // && (data[++index] == 't')
2886 // && (data[++index] == 'i')
2887 // && (data[++index] == 'o')
2888 // && (data[++index] == 'n')) {
2889 // return TokenNameold_function;
2892 return TokenNameIdentifier;
2896 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2897 return TokenNameprint;
2900 return TokenNameIdentifier;
2901 case 'r' : //return require require_once
2903 if ((data[++index] == 'e')
2904 && (data[++index] == 't')
2905 && (data[++index] == 'u')
2906 && (data[++index] == 'r')
2907 && (data[++index] == 'n')) {
2908 return TokenNamereturn;
2910 } else if (length == 7) {
2911 if ((data[++index] == 'e')
2912 && (data[++index] == 'q')
2913 && (data[++index] == 'u')
2914 && (data[++index] == 'i')
2915 && (data[++index] == 'r')
2916 && (data[++index] == 'e')) {
2917 return TokenNamerequire;
2919 } else if (length == 12) {
2920 if ((data[++index] == 'e')
2921 && (data[++index] == 'q')
2922 && (data[++index] == 'u')
2923 && (data[++index] == 'i')
2924 && (data[++index] == 'r')
2925 && (data[++index] == 'e')
2926 && (data[++index] == '_')
2927 && (data[++index] == 'o')
2928 && (data[++index] == 'n')
2929 && (data[++index] == 'c')
2930 && (data[++index] == 'e')) {
2931 return TokenNamerequire_once;
2934 return TokenNameIdentifier;
2936 case 's' : //static switch
2939 if (data[++index] == 't')
2940 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2941 return TokenNamestatic;
2943 return TokenNameIdentifier;
2945 (data[index] == 'w')
2946 && (data[++index] == 'i')
2947 && (data[++index] == 't')
2948 && (data[++index] == 'c')
2949 && (data[++index] == 'h'))
2950 return TokenNameswitch;
2952 return TokenNameIdentifier;
2954 return TokenNameIdentifier;
2961 if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2962 return TokenNametrue;
2964 return TokenNameIdentifier;
2965 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2966 // return TokenNamethis;
2969 return TokenNameIdentifier;
2975 if ((data[++index] == 'a') && (data[++index] == 'r'))
2976 return TokenNamevar;
2978 return TokenNameIdentifier;
2981 return TokenNameIdentifier;
2987 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2988 return TokenNamewhile;
2990 return TokenNameIdentifier;
2991 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2992 //return TokenNamewidefp ;
2994 //return TokenNameIdentifier;
2996 return TokenNameIdentifier;
3002 if ((data[++index] == 'o') && (data[++index] == 'r'))
3003 return TokenNameXOR;
3005 return TokenNameIdentifier;
3008 return TokenNameIdentifier;
3011 return TokenNameIdentifier;
3014 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3016 //when entering this method the currentCharacter is the firt
3017 //digit of the number , i.e. it may be preceeded by a . when
3020 boolean floating = dotPrefix;
3021 if ((!dotPrefix) && (currentCharacter == '0')) {
3022 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3023 //force the first char of the hexa number do exist...
3024 // consume next character
3025 unicodeAsBackSlash = false;
3026 currentCharacter = source[currentPosition++];
3027 // if (((currentCharacter = source[currentPosition++]) == '\\')
3028 // && (source[currentPosition] == 'u')) {
3029 // getNextUnicodeChar();
3031 // if (withoutUnicodePtr != 0) {
3032 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3035 if (Character.digit(currentCharacter, 16) == -1)
3036 throw new InvalidInputException(INVALID_HEXA);
3038 while (getNextCharAsDigit(16)) {
3040 // if (getNextChar('l', 'L') >= 0)
3041 // return TokenNameLongLiteral;
3043 return TokenNameIntegerLiteral;
3046 //there is x or X in the number
3047 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3048 if (getNextCharAsDigit()) {
3049 //-------------potential octal-----------------
3050 while (getNextCharAsDigit()) {
3053 // if (getNextChar('l', 'L') >= 0) {
3054 // return TokenNameLongLiteral;
3057 // if (getNextChar('f', 'F') >= 0) {
3058 // return TokenNameFloatingPointLiteral;
3061 if (getNextChar('d', 'D') >= 0) {
3062 return TokenNameDoubleLiteral;
3063 } else { //make the distinction between octal and float ....
3064 if (getNextChar('.')) { //bingo ! ....
3065 while (getNextCharAsDigit()) {
3067 if (getNextChar('e', 'E') >= 0) {
3068 // consume next character
3069 unicodeAsBackSlash = false;
3070 currentCharacter = source[currentPosition++];
3071 // if (((currentCharacter = source[currentPosition++]) == '\\')
3072 // && (source[currentPosition] == 'u')) {
3073 // getNextUnicodeChar();
3075 // if (withoutUnicodePtr != 0) {
3076 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3080 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3081 // consume next character
3082 unicodeAsBackSlash = false;
3083 currentCharacter = source[currentPosition++];
3084 // if (((currentCharacter = source[currentPosition++]) == '\\')
3085 // && (source[currentPosition] == 'u')) {
3086 // getNextUnicodeChar();
3088 // if (withoutUnicodePtr != 0) {
3089 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3090 // currentCharacter;
3094 if (!Character.isDigit(currentCharacter))
3095 throw new InvalidInputException(INVALID_FLOAT);
3096 while (getNextCharAsDigit()) {
3099 // if (getNextChar('f', 'F') >= 0)
3100 // return TokenNameFloatingPointLiteral;
3101 getNextChar('d', 'D'); //jump over potential d or D
3102 return TokenNameDoubleLiteral;
3104 return TokenNameIntegerLiteral;
3112 while (getNextCharAsDigit()) {
3115 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3116 // return TokenNameLongLiteral;
3118 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3119 while (getNextCharAsDigit()) {
3124 //if floating is true both exponant and suffix may be optional
3126 if (getNextChar('e', 'E') >= 0) {
3128 // consume next character
3129 unicodeAsBackSlash = false;
3130 currentCharacter = source[currentPosition++];
3131 // if (((currentCharacter = source[currentPosition++]) == '\\')
3132 // && (source[currentPosition] == 'u')) {
3133 // getNextUnicodeChar();
3135 // if (withoutUnicodePtr != 0) {
3136 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3140 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3141 unicodeAsBackSlash = false;
3142 currentCharacter = source[currentPosition++];
3143 // if (((currentCharacter = source[currentPosition++]) == '\\')
3144 // && (source[currentPosition] == 'u')) {
3145 // getNextUnicodeChar();
3147 // if (withoutUnicodePtr != 0) {
3148 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3152 if (!Character.isDigit(currentCharacter))
3153 throw new InvalidInputException(INVALID_FLOAT);
3154 while (getNextCharAsDigit()) {
3158 if (getNextChar('d', 'D') >= 0)
3159 return TokenNameDoubleLiteral;
3160 // if (getNextChar('f', 'F') >= 0)
3161 // return TokenNameFloatingPointLiteral;
3163 //the long flag has been tested before
3165 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3168 * Search the line number corresponding to a specific position
3171 public final int getLineNumber(int position) {
3173 if (lineEnds == null)
3175 int length = linePtr + 1;
3178 int g = 0, d = length - 1;
3182 if (position < lineEnds[m]) {
3184 } else if (position > lineEnds[m]) {
3190 if (position < lineEnds[m]) {
3196 public void setPHPMode(boolean mode) {
3200 public final void setSource(char[] source) {
3201 //the source-buffer is set to sourceString
3203 if (source == null) {
3204 this.source = new char[0];
3206 this.source = source;
3209 initialPosition = currentPosition = 0;
3210 containsAssertKeyword = false;
3211 withoutUnicodeBuffer = new char[this.source.length];
3215 public String toString() {
3216 if (startPosition == source.length)
3217 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3218 if (currentPosition > source.length)
3219 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3221 char front[] = new char[startPosition];
3222 System.arraycopy(source, 0, front, 0, startPosition);
3224 int middleLength = (currentPosition - 1) - startPosition + 1;
3226 if (middleLength > -1) {
3227 middle = new char[middleLength];
3228 System.arraycopy(source, startPosition, middle, 0, middleLength);
3230 middle = new char[0];
3233 char end[] = new char[source.length - (currentPosition - 1)];
3234 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3236 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3237 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3240 public final String toStringAction(int act) {
3243 case TokenNameERROR :
3244 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3245 case TokenNameStopPHP :
3246 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3247 case TokenNameIdentifier :
3248 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3249 case TokenNameVariable :
3250 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3252 return "as"; //$NON-NLS-1$
3253 case TokenNamebreak :
3254 return "break"; //$NON-NLS-1$
3255 case TokenNamecase :
3256 return "case"; //$NON-NLS-1$
3257 case TokenNameclass :
3258 return "class"; //$NON-NLS-1$
3259 case TokenNamecontinue :
3260 return "continue"; //$NON-NLS-1$
3261 case TokenNamedefault :
3262 return "default"; //$NON-NLS-1$
3263 case TokenNamedefine :
3264 return "define"; //$NON-NLS-1$
3266 return "do"; //$NON-NLS-1$
3267 case TokenNameecho :
3268 return "echo"; //$NON-NLS-1$
3269 case TokenNameelse :
3270 return "else"; //$NON-NLS-1$
3271 case TokenNameelseif :
3272 return "elseif"; //$NON-NLS-1$
3273 case TokenNameendfor :
3274 return "endfor"; //$NON-NLS-1$
3275 case TokenNameendforeach :
3276 return "endforeach"; //$NON-NLS-1$
3277 case TokenNameendif :
3278 return "endif"; //$NON-NLS-1$
3279 case TokenNameendswitch :
3280 return "endswitch"; //$NON-NLS-1$
3281 case TokenNameendwhile :
3282 return "endwhile"; //$NON-NLS-1$
3283 case TokenNameextends :
3284 return "extends"; //$NON-NLS-1$
3285 case TokenNamefalse :
3286 return "false"; //$NON-NLS-1$
3288 return "for"; //$NON-NLS-1$
3289 case TokenNameforeach :
3290 return "foreach"; //$NON-NLS-1$
3291 case TokenNamefunction :
3292 return "function"; //$NON-NLS-1$
3293 case TokenNameglobal :
3294 return "global"; //$NON-NLS-1$
3296 return "if"; //$NON-NLS-1$
3297 case TokenNameinclude :
3298 return "include"; //$NON-NLS-1$
3299 case TokenNameinclude_once :
3300 return "include_once"; //$NON-NLS-1$
3301 case TokenNamelist :
3302 return "list"; //$NON-NLS-1$
3304 return "new"; //$NON-NLS-1$
3305 case TokenNamenull :
3306 return "null"; //$NON-NLS-1$
3307 case TokenNameprint :
3308 return "print"; //$NON-NLS-1$
3309 case TokenNamerequire :
3310 return "require"; //$NON-NLS-1$
3311 case TokenNamerequire_once :
3312 return "require_once"; //$NON-NLS-1$
3313 case TokenNamereturn :
3314 return "return"; //$NON-NLS-1$
3315 case TokenNamestatic :
3316 return "static"; //$NON-NLS-1$
3317 case TokenNameswitch :
3318 return "switch"; //$NON-NLS-1$
3319 case TokenNametrue :
3320 return "true"; //$NON-NLS-1$
3322 return "var"; //$NON-NLS-1$
3323 case TokenNamewhile :
3324 return "while"; //$NON-NLS-1$
3325 case TokenNamethis :
3326 return "$this"; //$NON-NLS-1$
3327 case TokenNameIntegerLiteral :
3328 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3329 case TokenNameDoubleLiteral :
3330 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3331 case TokenNameStringLiteral :
3332 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3333 case TokenNameStringConstant :
3334 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3335 case TokenNameStringInterpolated :
3336 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3337 case TokenNameHEREDOC :
3338 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3340 case TokenNamePLUS_PLUS :
3341 return "++"; //$NON-NLS-1$
3342 case TokenNameMINUS_MINUS :
3343 return "--"; //$NON-NLS-1$
3344 case TokenNameEQUAL_EQUAL :
3345 return "=="; //$NON-NLS-1$
3346 case TokenNameEQUAL_EQUAL_EQUAL :
3347 return "==="; //$NON-NLS-1$
3348 case TokenNameEQUAL_GREATER :
3349 return "=>"; //$NON-NLS-1$
3350 case TokenNameLESS_EQUAL :
3351 return "<="; //$NON-NLS-1$
3352 case TokenNameGREATER_EQUAL :
3353 return ">="; //$NON-NLS-1$
3354 case TokenNameNOT_EQUAL :
3355 return "!="; //$NON-NLS-1$
3356 case TokenNameNOT_EQUAL_EQUAL :
3357 return "!=="; //$NON-NLS-1$
3358 case TokenNameLEFT_SHIFT :
3359 return "<<"; //$NON-NLS-1$
3360 case TokenNameRIGHT_SHIFT :
3361 return ">>"; //$NON-NLS-1$
3362 case TokenNamePLUS_EQUAL :
3363 return "+="; //$NON-NLS-1$
3364 case TokenNameMINUS_EQUAL :
3365 return "-="; //$NON-NLS-1$
3366 case TokenNameMULTIPLY_EQUAL :
3367 return "*="; //$NON-NLS-1$
3368 case TokenNameDIVIDE_EQUAL :
3369 return "/="; //$NON-NLS-1$
3370 case TokenNameAND_EQUAL :
3371 return "&="; //$NON-NLS-1$
3372 case TokenNameOR_EQUAL :
3373 return "|="; //$NON-NLS-1$
3374 case TokenNameXOR_EQUAL :
3375 return "^="; //$NON-NLS-1$
3376 case TokenNameREMAINDER_EQUAL :
3377 return "%="; //$NON-NLS-1$
3378 case TokenNameLEFT_SHIFT_EQUAL :
3379 return "<<="; //$NON-NLS-1$
3380 case TokenNameRIGHT_SHIFT_EQUAL :
3381 return ">>="; //$NON-NLS-1$
3382 case TokenNameOR_OR :
3383 return "||"; //$NON-NLS-1$
3384 case TokenNameAND_AND :
3385 return "&&"; //$NON-NLS-1$
3386 case TokenNamePLUS :
3387 return "+"; //$NON-NLS-1$
3388 case TokenNameMINUS :
3389 return "-"; //$NON-NLS-1$
3390 case TokenNameMINUS_GREATER :
3393 return "!"; //$NON-NLS-1$
3394 case TokenNameREMAINDER :
3395 return "%"; //$NON-NLS-1$
3397 return "^"; //$NON-NLS-1$
3399 return "&"; //$NON-NLS-1$
3400 case TokenNameMULTIPLY :
3401 return "*"; //$NON-NLS-1$
3403 return "|"; //$NON-NLS-1$
3404 case TokenNameTWIDDLE :
3405 return "~"; //$NON-NLS-1$
3406 case TokenNameTWIDDLE_EQUAL :
3407 return "~="; //$NON-NLS-1$
3408 case TokenNameDIVIDE :
3409 return "/"; //$NON-NLS-1$
3410 case TokenNameGREATER :
3411 return ">"; //$NON-NLS-1$
3412 case TokenNameLESS :
3413 return "<"; //$NON-NLS-1$
3414 case TokenNameLPAREN :
3415 return "("; //$NON-NLS-1$
3416 case TokenNameRPAREN :
3417 return ")"; //$NON-NLS-1$
3418 case TokenNameLBRACE :
3419 return "{"; //$NON-NLS-1$
3420 case TokenNameRBRACE :
3421 return "}"; //$NON-NLS-1$
3422 case TokenNameLBRACKET :
3423 return "["; //$NON-NLS-1$
3424 case TokenNameRBRACKET :
3425 return "]"; //$NON-NLS-1$
3426 case TokenNameSEMICOLON :
3427 return ";"; //$NON-NLS-1$
3428 case TokenNameQUESTION :
3429 return "?"; //$NON-NLS-1$
3430 case TokenNameCOLON :
3431 return ":"; //$NON-NLS-1$
3432 case TokenNameCOMMA :
3433 return ","; //$NON-NLS-1$
3435 return "."; //$NON-NLS-1$
3436 case TokenNameEQUAL :
3437 return "="; //$NON-NLS-1$
3440 case TokenNameDOLLAR_LBRACE :
3443 return "EOF"; //$NON-NLS-1$
3444 case TokenNameWHITESPACE :
3445 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3446 case TokenNameCOMMENT_LINE :
3447 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3448 case TokenNameCOMMENT_BLOCK :
3449 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3450 case TokenNameCOMMENT_PHPDOC :
3451 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3452 case TokenNameHTML :
3453 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3455 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3459 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3460 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3464 boolean tokenizeComments,
3465 boolean tokenizeWhiteSpace,
3466 boolean checkNonExternalizedStringLiterals,
3467 boolean assertMode) {
3468 this.eofPosition = Integer.MAX_VALUE;
3469 this.tokenizeComments = tokenizeComments;
3470 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3471 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3472 this.assertMode = assertMode;
3475 private void checkNonExternalizeString() throws InvalidInputException {
3476 if (currentLine == null)
3478 parseTags(currentLine);
3481 private void parseTags(NLSLine line) throws InvalidInputException {
3482 String s = new String(getCurrentTokenSource());
3483 int pos = s.indexOf(TAG_PREFIX);
3484 int lineLength = line.size();
3486 int start = pos + TAG_PREFIX_LENGTH;
3487 int end = s.indexOf(TAG_POSTFIX, start);
3488 String index = s.substring(start, end);
3491 i = Integer.parseInt(index) - 1;
3492 // Tags are one based not zero based.
3493 } catch (NumberFormatException e) {
3494 i = -1; // we don't want to consider this as a valid NLS tag
3496 if (line.exists(i)) {
3499 pos = s.indexOf(TAG_PREFIX, start);
3502 this.nonNLSStrings = new StringLiteral[lineLength];
3503 int nonNLSCounter = 0;
3504 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3505 StringLiteral literal = (StringLiteral) iterator.next();
3506 if (literal != null) {
3507 this.nonNLSStrings[nonNLSCounter++] = literal;
3510 if (nonNLSCounter == 0) {
3511 this.nonNLSStrings = null;
3515 this.wasNonExternalizedStringLiteral = true;
3516 if (nonNLSCounter != lineLength) {
3517 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);