1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120 static final int TableSize = 30, InternalTableSize = 6;
122 public static final int OptimizedLength = 6;
124 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125 // support for detecting non-externalized string literals
126 int currentLineNr = -1;
127 int previousLineNr = -1;
128 NLSLine currentLine = null;
129 List lines = new ArrayList();
130 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134 public StringLiteral[] nonNLSStrings = null;
135 public boolean checkNonExternalizedStringLiterals = true;
136 public boolean wasNonExternalizedStringLiteral = false;
139 for (int i = 0; i < 6; i++) {
140 for (int j = 0; j < TableSize; j++) {
141 for (int k = 0; k < InternalTableSize; k++) {
142 charArray_length[i][j][k] = initCharArray;
147 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
149 public static final int RoundBracket = 0;
150 public static final int SquareBracket = 1;
151 public static final int CurlyBracket = 2;
152 public static final int BracketKinds = 3;
154 public static final boolean DEBUG = false;
159 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
160 this(tokenizeComments, tokenizeWhiteSpace, false);
164 * Determines if the specified character is
165 * permissible as the first character in a PHP identifier
167 public static boolean isPHPIdentifierStart(char ch) {
168 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
172 * Determines if the specified character may be part of a PHP identifier as
173 * other than the first character
175 public static boolean isPHPIdentifierPart(char ch) {
176 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
179 public final boolean atEnd() {
180 // This code is not relevant if source is
181 // Only a part of the real stream input
183 return source.length == currentPosition;
185 public char[] getCurrentIdentifierSource() {
186 //return the token REAL source (aka unicodes are precomputed)
189 // if (withoutUnicodePtr != 0)
190 // //0 is used as a fast test flag so the real first char is in position 1
192 // withoutUnicodeBuffer,
194 // result = new char[withoutUnicodePtr],
196 // withoutUnicodePtr);
198 int length = currentPosition - startPosition;
199 switch (length) { // see OptimizedLength
201 return optimizedCurrentTokenSource1();
203 return optimizedCurrentTokenSource2();
205 return optimizedCurrentTokenSource3();
207 return optimizedCurrentTokenSource4();
209 return optimizedCurrentTokenSource5();
211 return optimizedCurrentTokenSource6();
214 System.arraycopy(source, startPosition, result = new char[length], 0, length);
218 public int getCurrentTokenEndPosition() {
219 return this.currentPosition - 1;
223 public final char[] getCurrentTokenSource() {
224 // Return the token REAL source (aka unicodes are precomputed)
227 // if (withoutUnicodePtr != 0)
228 // // 0 is used as a fast test flag so the real first char is in position 1
230 // withoutUnicodeBuffer,
232 // result = new char[withoutUnicodePtr],
234 // withoutUnicodePtr);
237 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
242 public final char[] getCurrentTokenSource(int startPos) {
243 // Return the token REAL source (aka unicodes are precomputed)
246 // if (withoutUnicodePtr != 0)
247 // // 0 is used as a fast test flag so the real first char is in position 1
249 // withoutUnicodeBuffer,
251 // result = new char[withoutUnicodePtr],
253 // withoutUnicodePtr);
256 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
261 public final char[] getCurrentTokenSourceString() {
262 //return the token REAL source (aka unicodes are precomputed).
263 //REMOVE the two " that are at the beginning and the end.
266 if (withoutUnicodePtr != 0)
267 //0 is used as a fast test flag so the real first char is in position 1
268 System.arraycopy(withoutUnicodeBuffer, 2,
269 //2 is 1 (real start) + 1 (to jump over the ")
270 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
273 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
277 public int getCurrentTokenStartPosition() {
278 return this.startPosition;
281 public final char[] getCurrentStringLiteralSource() {
282 // Return the token REAL source (aka unicodes are precomputed)
287 System.arraycopy(source, startPosition+1, result = new char[length = currentPosition - startPosition - 2], 0, length);
293 * Search the source position corresponding to the end of a given line number
295 * Line numbers are 1-based, and relative to the scanner initialPosition.
296 * Character positions are 0-based.
298 * In case the given line number is inconsistent, answers -1.
300 public final int getLineEnd(int lineNumber) {
302 if (lineEnds == null)
304 if (lineNumber >= lineEnds.length)
309 if (lineNumber == lineEnds.length - 1)
311 return lineEnds[lineNumber - 1];
312 // next line start one character behind the lineEnd of the previous line
315 * Search the source position corresponding to the beginning of a given line number
317 * Line numbers are 1-based, and relative to the scanner initialPosition.
318 * Character positions are 0-based.
320 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
322 * In case the given line number is inconsistent, answers -1.
324 public final int getLineStart(int lineNumber) {
326 if (lineEnds == null)
328 if (lineNumber >= lineEnds.length)
334 return initialPosition;
335 return lineEnds[lineNumber - 2] + 1;
336 // next line start one character behind the lineEnd of the previous line
338 public final boolean getNextChar(char testedChar) {
340 //handle the case of unicode.
341 //when a unicode appears then we must use a buffer that holds char internal values
342 //At the end of this method currentCharacter holds the new visited char
343 //and currentPosition points right next after it
344 //Both previous lines are true if the currentCharacter is == to the testedChar
345 //On false, no side effect has occured.
347 //ALL getNextChar.... ARE OPTIMIZED COPIES
349 int temp = currentPosition;
351 currentCharacter = source[currentPosition++];
352 // if (((currentCharacter = source[currentPosition++]) == '\\')
353 // && (source[currentPosition] == 'u')) {
354 // //-------------unicode traitement ------------
355 // int c1, c2, c3, c4;
356 // int unicodeSize = 6;
357 // currentPosition++;
358 // while (source[currentPosition] == 'u') {
359 // currentPosition++;
363 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
365 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
367 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
369 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
371 // currentPosition = temp;
375 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
376 // if (currentCharacter != testedChar) {
377 // currentPosition = temp;
380 // unicodeAsBackSlash = currentCharacter == '\\';
382 // //need the unicode buffer
383 // if (withoutUnicodePtr == 0) {
384 // //buffer all the entries that have been left aside....
385 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
389 // withoutUnicodeBuffer,
391 // withoutUnicodePtr);
393 // //fill the buffer with the char
394 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
397 // } //-------------end unicode traitement--------------
399 if (currentCharacter != testedChar) {
400 currentPosition = temp;
403 unicodeAsBackSlash = false;
404 // if (withoutUnicodePtr != 0)
405 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
408 } catch (IndexOutOfBoundsException e) {
409 unicodeAsBackSlash = false;
410 currentPosition = temp;
414 public final int getNextChar(char testedChar1, char testedChar2) {
415 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
416 //test can be done with (x==0) for the first and (x>0) for the second
417 //handle the case of unicode.
418 //when a unicode appears then we must use a buffer that holds char internal values
419 //At the end of this method currentCharacter holds the new visited char
420 //and currentPosition points right next after it
421 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
422 //On false, no side effect has occured.
424 //ALL getNextChar.... ARE OPTIMIZED COPIES
426 int temp = currentPosition;
429 currentCharacter = source[currentPosition++];
430 // if (((currentCharacter = source[currentPosition++]) == '\\')
431 // && (source[currentPosition] == 'u')) {
432 // //-------------unicode traitement ------------
433 // int c1, c2, c3, c4;
434 // int unicodeSize = 6;
435 // currentPosition++;
436 // while (source[currentPosition] == 'u') {
437 // currentPosition++;
441 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
443 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
445 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
447 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
449 // currentPosition = temp;
453 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
454 // if (currentCharacter == testedChar1)
456 // else if (currentCharacter == testedChar2)
459 // currentPosition = temp;
463 // //need the unicode buffer
464 // if (withoutUnicodePtr == 0) {
465 // //buffer all the entries that have been left aside....
466 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
470 // withoutUnicodeBuffer,
472 // withoutUnicodePtr);
474 // //fill the buffer with the char
475 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
477 // } //-------------end unicode traitement--------------
479 if (currentCharacter == testedChar1)
481 else if (currentCharacter == testedChar2)
484 currentPosition = temp;
488 // if (withoutUnicodePtr != 0)
489 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
492 } catch (IndexOutOfBoundsException e) {
493 currentPosition = temp;
497 public final boolean getNextCharAsDigit() {
499 //handle the case of unicode.
500 //when a unicode appears then we must use a buffer that holds char internal values
501 //At the end of this method currentCharacter holds the new visited char
502 //and currentPosition points right next after it
503 //Both previous lines are true if the currentCharacter is a digit
504 //On false, no side effect has occured.
506 //ALL getNextChar.... ARE OPTIMIZED COPIES
508 int temp = currentPosition;
510 currentCharacter = source[currentPosition++];
511 // if (((currentCharacter = source[currentPosition++]) == '\\')
512 // && (source[currentPosition] == 'u')) {
513 // //-------------unicode traitement ------------
514 // int c1, c2, c3, c4;
515 // int unicodeSize = 6;
516 // currentPosition++;
517 // while (source[currentPosition] == 'u') {
518 // currentPosition++;
522 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
524 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
526 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
528 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
530 // currentPosition = temp;
534 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
535 // if (!Character.isDigit(currentCharacter)) {
536 // currentPosition = temp;
540 // //need the unicode buffer
541 // if (withoutUnicodePtr == 0) {
542 // //buffer all the entries that have been left aside....
543 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
547 // withoutUnicodeBuffer,
549 // withoutUnicodePtr);
551 // //fill the buffer with the char
552 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
554 // } //-------------end unicode traitement--------------
556 if (!Character.isDigit(currentCharacter)) {
557 currentPosition = temp;
560 // if (withoutUnicodePtr != 0)
561 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
564 } catch (IndexOutOfBoundsException e) {
565 currentPosition = temp;
569 public final boolean getNextCharAsDigit(int radix) {
571 //handle the case of unicode.
572 //when a unicode appears then we must use a buffer that holds char internal values
573 //At the end of this method currentCharacter holds the new visited char
574 //and currentPosition points right next after it
575 //Both previous lines are true if the currentCharacter is a digit base on radix
576 //On false, no side effect has occured.
578 //ALL getNextChar.... ARE OPTIMIZED COPIES
580 int temp = currentPosition;
582 currentCharacter = source[currentPosition++];
583 // if (((currentCharacter = source[currentPosition++]) == '\\')
584 // && (source[currentPosition] == 'u')) {
585 // //-------------unicode traitement ------------
586 // int c1, c2, c3, c4;
587 // int unicodeSize = 6;
588 // currentPosition++;
589 // while (source[currentPosition] == 'u') {
590 // currentPosition++;
594 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
596 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
598 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
600 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
602 // currentPosition = temp;
606 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
607 // if (Character.digit(currentCharacter, radix) == -1) {
608 // currentPosition = temp;
612 // //need the unicode buffer
613 // if (withoutUnicodePtr == 0) {
614 // //buffer all the entries that have been left aside....
615 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
619 // withoutUnicodeBuffer,
621 // withoutUnicodePtr);
623 // //fill the buffer with the char
624 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
626 // } //-------------end unicode traitement--------------
628 if (Character.digit(currentCharacter, radix) == -1) {
629 currentPosition = temp;
632 // if (withoutUnicodePtr != 0)
633 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
636 } catch (IndexOutOfBoundsException e) {
637 currentPosition = temp;
641 public boolean getNextCharAsJavaIdentifierPart() {
643 //handle the case of unicode.
644 //when a unicode appears then we must use a buffer that holds char internal values
645 //At the end of this method currentCharacter holds the new visited char
646 //and currentPosition points right next after it
647 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
648 //On false, no side effect has occured.
650 //ALL getNextChar.... ARE OPTIMIZED COPIES
652 int temp = currentPosition;
654 currentCharacter = source[currentPosition++];
655 // if (((currentCharacter = source[currentPosition++]) == '\\')
656 // && (source[currentPosition] == 'u')) {
657 // //-------------unicode traitement ------------
658 // int c1, c2, c3, c4;
659 // int unicodeSize = 6;
660 // currentPosition++;
661 // while (source[currentPosition] == 'u') {
662 // currentPosition++;
666 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
668 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
670 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
672 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
674 // currentPosition = temp;
678 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
679 // if (!isPHPIdentifierPart(currentCharacter)) {
680 // currentPosition = temp;
684 // //need the unicode buffer
685 // if (withoutUnicodePtr == 0) {
686 // //buffer all the entries that have been left aside....
687 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
691 // withoutUnicodeBuffer,
693 // withoutUnicodePtr);
695 // //fill the buffer with the char
696 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
698 // } //-------------end unicode traitement--------------
700 if (!isPHPIdentifierPart(currentCharacter)) {
701 currentPosition = temp;
705 // if (withoutUnicodePtr != 0)
706 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
709 } catch (IndexOutOfBoundsException e) {
710 currentPosition = temp;
715 public int getNextToken() throws InvalidInputException {
716 int htmlPosition = currentPosition;
719 currentCharacter = source[currentPosition++];
720 if (currentCharacter == '<') {
721 if (getNextChar('?')) {
722 currentCharacter = source[currentPosition++];
723 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
725 startPosition = currentPosition;
727 if (tokenizeWhiteSpace) {
728 // && (whiteStart != currentPosition - 1)) {
729 // reposition scanner in case we are interested by spaces as tokens
730 startPosition = htmlPosition;
731 return TokenNameHTML;
734 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
736 int test = getNextChar('H', 'h');
738 test = getNextChar('P', 'p');
741 startPosition = currentPosition;
744 if (tokenizeWhiteSpace) {
745 // && (whiteStart != currentPosition - 1)) {
746 // reposition scanner in case we are interested by spaces as tokens
747 startPosition = htmlPosition;
748 return TokenNameHTML;
757 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
758 if (recordLineSeparator) {
765 } //-----------------end switch while try--------------------
766 catch (IndexOutOfBoundsException e) {
767 if (tokenizeWhiteSpace) {
768 // && (whiteStart != currentPosition - 1)) {
769 // reposition scanner in case we are interested by spaces as tokens
770 startPosition = htmlPosition;
778 jumpOverMethodBody();
780 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
783 while (true) { //loop for jumping over comments
784 withoutUnicodePtr = 0;
785 //start with a new token (even comment written with unicode )
787 // ---------Consume white space and handles startPosition---------
788 int whiteStart = currentPosition;
789 boolean isWhiteSpace;
791 startPosition = currentPosition;
792 currentCharacter = source[currentPosition++];
793 // if (((currentCharacter = source[currentPosition++]) == '\\')
794 // && (source[currentPosition] == 'u')) {
795 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
797 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
798 checkNonExternalizeString();
799 if (recordLineSeparator) {
805 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
807 } while (isWhiteSpace);
808 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
809 // reposition scanner in case we are interested by spaces as tokens
811 startPosition = whiteStart;
812 return TokenNameWHITESPACE;
814 //little trick to get out in the middle of a source compuation
815 if (currentPosition > eofPosition)
818 // ---------Identify the next token-------------
820 switch (currentCharacter) {
822 return TokenNameLPAREN;
824 return TokenNameRPAREN;
826 return TokenNameLBRACE;
828 return TokenNameRBRACE;
830 return TokenNameLBRACKET;
832 return TokenNameRBRACKET;
834 return TokenNameSEMICOLON;
836 return TokenNameCOMMA;
839 if (getNextCharAsDigit())
840 return scanNumber(true);
845 if ((test = getNextChar('+', '=')) == 0)
846 return TokenNamePLUS_PLUS;
848 return TokenNamePLUS_EQUAL;
849 return TokenNamePLUS;
854 if ((test = getNextChar('-', '=')) == 0)
855 return TokenNameMINUS_MINUS;
857 return TokenNameMINUS_EQUAL;
858 if (getNextChar('>'))
859 return TokenNameMINUS_GREATER;
861 return TokenNameMINUS;
864 if (getNextChar('='))
865 return TokenNameTWIDDLE_EQUAL;
866 return TokenNameTWIDDLE;
868 if (getNextChar('='))
869 return TokenNameNOT_EQUAL;
872 if (getNextChar('='))
873 return TokenNameMULTIPLY_EQUAL;
874 return TokenNameMULTIPLY;
876 if (getNextChar('='))
877 return TokenNameREMAINDER_EQUAL;
878 return TokenNameREMAINDER;
882 if ((test = getNextChar('=', '<')) == 0)
883 return TokenNameLESS_EQUAL;
885 if (getNextChar('='))
886 return TokenNameLEFT_SHIFT_EQUAL;
887 if (getNextChar('<')) {
888 int heredocStart = currentPosition;
889 int heredocLength = 0;
890 currentCharacter = source[currentPosition++];
891 if (isPHPIdentifierStart(currentCharacter)) {
892 currentCharacter = source[currentPosition++];
894 return TokenNameERROR;
896 while (isPHPIdentifierPart(currentCharacter)) {
897 currentCharacter = source[currentPosition++];
900 heredocLength = currentPosition - heredocStart - 1;
902 // heredoc end-tag determination
903 boolean endTag = true;
906 ch = source[currentPosition++];
907 if (ch == '\r' || ch == '\n') {
908 if (recordLineSeparator) {
913 for (int i = 0; i < heredocLength; i++) {
914 if (source[currentPosition + i] != source[heredocStart + i]) {
920 currentPosition += heredocLength - 1;
921 currentCharacter = source[currentPosition++];
922 break; // do...while loop
930 return TokenNameHEREDOC;
932 return TokenNameLEFT_SHIFT;
934 return TokenNameLESS;
939 if ((test = getNextChar('=', '>')) == 0)
940 return TokenNameGREATER_EQUAL;
942 if ((test = getNextChar('=', '>')) == 0)
943 return TokenNameRIGHT_SHIFT_EQUAL;
944 return TokenNameRIGHT_SHIFT;
946 return TokenNameGREATER;
949 if (getNextChar('='))
950 return TokenNameEQUAL_EQUAL;
951 if (getNextChar('>'))
952 return TokenNameEQUAL_GREATER;
953 return TokenNameEQUAL;
957 if ((test = getNextChar('&', '=')) == 0)
958 return TokenNameAND_AND;
960 return TokenNameAND_EQUAL;
966 if ((test = getNextChar('|', '=')) == 0)
967 return TokenNameOR_OR;
969 return TokenNameOR_EQUAL;
973 if (getNextChar('='))
974 return TokenNameXOR_EQUAL;
977 if (getNextChar('>')) {
979 return TokenNameStopPHP;
981 return TokenNameQUESTION;
983 if (getNextChar(':'))
984 return TokenNameCOLON_COLON;
985 return TokenNameCOLON;
991 // if ((test = getNextChar('\n', '\r')) == 0) {
992 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
995 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
996 // for (int lookAhead = 0;
999 // if (currentPosition + lookAhead
1000 // == source.length)
1002 // if (source[currentPosition + lookAhead]
1005 // if (source[currentPosition + lookAhead]
1007 // currentPosition += lookAhead + 1;
1011 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1014 // if (getNextChar('\'')) {
1015 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1016 // for (int lookAhead = 0;
1019 // if (currentPosition + lookAhead
1020 // == source.length)
1022 // if (source[currentPosition + lookAhead]
1025 // if (source[currentPosition + lookAhead]
1027 // currentPosition += lookAhead + 1;
1031 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1033 // if (getNextChar('\\'))
1034 // scanEscapeCharacter();
1035 // else { // consume next character
1036 // unicodeAsBackSlash = false;
1037 // if (((currentCharacter = source[currentPosition++])
1039 // && (source[currentPosition] == 'u')) {
1040 // getNextUnicodeChar();
1042 // if (withoutUnicodePtr != 0) {
1043 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1044 // currentCharacter;
1048 // // if (getNextChar('\''))
1049 // // return TokenNameCharacterLiteral;
1050 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1051 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1052 // if (currentPosition + lookAhead == source.length)
1054 // if (source[currentPosition + lookAhead] == '\n')
1056 // if (source[currentPosition + lookAhead] == '\'') {
1057 // currentPosition += lookAhead + 1;
1061 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1064 // consume next character
1065 unicodeAsBackSlash = false;
1066 currentCharacter = source[currentPosition++];
1067 // if (((currentCharacter = source[currentPosition++]) == '\\')
1068 // && (source[currentPosition] == 'u')) {
1069 // getNextUnicodeChar();
1071 // if (withoutUnicodePtr != 0) {
1072 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1073 // currentCharacter;
1077 while (currentCharacter != '\'') {
1079 /**** in PHP \r and \n are valid in string literals ****/
1080 // if ((currentCharacter == '\n')
1081 // || (currentCharacter == '\r')) {
1082 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1083 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1084 // if (currentPosition + lookAhead == source.length)
1086 // if (source[currentPosition + lookAhead] == '\n')
1088 // if (source[currentPosition + lookAhead] == '\"') {
1089 // currentPosition += lookAhead + 1;
1093 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1095 if (currentCharacter == '\\') {
1096 int escapeSize = currentPosition;
1097 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1098 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1099 scanSingleQuotedEscapeCharacter();
1100 escapeSize = currentPosition - escapeSize;
1101 if (withoutUnicodePtr == 0) {
1102 //buffer all the entries that have been left aside....
1103 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1104 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1105 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1106 } else { //overwrite the / in the buffer
1107 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1108 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1109 withoutUnicodePtr--;
1113 // consume next character
1114 unicodeAsBackSlash = false;
1115 currentCharacter = source[currentPosition++];
1116 // if (((currentCharacter = source[currentPosition++]) == '\\')
1117 // && (source[currentPosition] == 'u')) {
1118 // getNextUnicodeChar();
1120 if (withoutUnicodePtr != 0) {
1121 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1126 } catch (IndexOutOfBoundsException e) {
1127 throw new InvalidInputException(UNTERMINATED_STRING);
1128 } catch (InvalidInputException e) {
1129 if (e.getMessage().equals(INVALID_ESCAPE)) {
1130 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1131 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1132 if (currentPosition + lookAhead == source.length)
1134 if (source[currentPosition + lookAhead] == '\n')
1136 if (source[currentPosition + lookAhead] == '\'') {
1137 currentPosition += lookAhead + 1;
1145 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1146 if (currentLine == null) {
1147 currentLine = new NLSLine();
1148 lines.add(currentLine);
1150 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1152 return TokenNameStringConstant;
1155 // consume next character
1156 unicodeAsBackSlash = false;
1157 currentCharacter = source[currentPosition++];
1158 // if (((currentCharacter = source[currentPosition++]) == '\\')
1159 // && (source[currentPosition] == 'u')) {
1160 // getNextUnicodeChar();
1162 // if (withoutUnicodePtr != 0) {
1163 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1164 // currentCharacter;
1168 while (currentCharacter != '"') {
1170 /**** in PHP \r and \n are valid in string literals ****/
1171 // if ((currentCharacter == '\n')
1172 // || (currentCharacter == '\r')) {
1173 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1174 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1175 // if (currentPosition + lookAhead == source.length)
1177 // if (source[currentPosition + lookAhead] == '\n')
1179 // if (source[currentPosition + lookAhead] == '\"') {
1180 // currentPosition += lookAhead + 1;
1184 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1186 if (currentCharacter == '\\') {
1187 int escapeSize = currentPosition;
1188 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1189 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1190 scanDoubleQuotedEscapeCharacter();
1191 escapeSize = currentPosition - escapeSize;
1192 if (withoutUnicodePtr == 0) {
1193 //buffer all the entries that have been left aside....
1194 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1195 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1196 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1197 } else { //overwrite the / in the buffer
1198 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1199 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1200 withoutUnicodePtr--;
1204 // consume next character
1205 unicodeAsBackSlash = false;
1206 currentCharacter = source[currentPosition++];
1207 // if (((currentCharacter = source[currentPosition++]) == '\\')
1208 // && (source[currentPosition] == 'u')) {
1209 // getNextUnicodeChar();
1211 if (withoutUnicodePtr != 0) {
1212 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1217 } catch (IndexOutOfBoundsException e) {
1218 throw new InvalidInputException(UNTERMINATED_STRING);
1219 } catch (InvalidInputException e) {
1220 if (e.getMessage().equals(INVALID_ESCAPE)) {
1221 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1222 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1223 if (currentPosition + lookAhead == source.length)
1225 if (source[currentPosition + lookAhead] == '\n')
1227 if (source[currentPosition + lookAhead] == '\"') {
1228 currentPosition += lookAhead + 1;
1236 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1237 if (currentLine == null) {
1238 currentLine = new NLSLine();
1239 lines.add(currentLine);
1241 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1243 return TokenNameStringLiteral;
1246 // consume next character
1247 unicodeAsBackSlash = false;
1248 currentCharacter = source[currentPosition++];
1249 // if (((currentCharacter = source[currentPosition++]) == '\\')
1250 // && (source[currentPosition] == 'u')) {
1251 // getNextUnicodeChar();
1253 // if (withoutUnicodePtr != 0) {
1254 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1255 // currentCharacter;
1259 while (currentCharacter != '`') {
1261 /**** in PHP \r and \n are valid in string literals ****/
1262 // if ((currentCharacter == '\n')
1263 // || (currentCharacter == '\r')) {
1264 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1265 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1266 // if (currentPosition + lookAhead == source.length)
1268 // if (source[currentPosition + lookAhead] == '\n')
1270 // if (source[currentPosition + lookAhead] == '\"') {
1271 // currentPosition += lookAhead + 1;
1275 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1277 if (currentCharacter == '\\') {
1278 int escapeSize = currentPosition;
1279 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1280 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1281 scanDoubleQuotedEscapeCharacter();
1282 escapeSize = currentPosition - escapeSize;
1283 if (withoutUnicodePtr == 0) {
1284 //buffer all the entries that have been left aside....
1285 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1286 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1287 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1288 } else { //overwrite the / in the buffer
1289 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1290 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1291 withoutUnicodePtr--;
1295 // consume next character
1296 unicodeAsBackSlash = false;
1297 currentCharacter = source[currentPosition++];
1298 // if (((currentCharacter = source[currentPosition++]) == '\\')
1299 // && (source[currentPosition] == 'u')) {
1300 // getNextUnicodeChar();
1302 if (withoutUnicodePtr != 0) {
1303 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1308 } catch (IndexOutOfBoundsException e) {
1309 throw new InvalidInputException(UNTERMINATED_STRING);
1310 } catch (InvalidInputException e) {
1311 if (e.getMessage().equals(INVALID_ESCAPE)) {
1312 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1313 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1314 if (currentPosition + lookAhead == source.length)
1316 if (source[currentPosition + lookAhead] == '\n')
1318 if (source[currentPosition + lookAhead] == '`') {
1319 currentPosition += lookAhead + 1;
1327 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1328 if (currentLine == null) {
1329 currentLine = new NLSLine();
1330 lines.add(currentLine);
1332 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1334 return TokenNameStringInterpolated;
1339 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1341 int endPositionForLineComment = 0;
1342 try { //get the next char
1343 currentCharacter = source[currentPosition++];
1344 // if (((currentCharacter = source[currentPosition++])
1346 // && (source[currentPosition] == 'u')) {
1347 // //-------------unicode traitement ------------
1348 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1349 // currentPosition++;
1350 // while (source[currentPosition] == 'u') {
1351 // currentPosition++;
1354 // Character.getNumericValue(source[currentPosition++]))
1358 // Character.getNumericValue(source[currentPosition++]))
1362 // Character.getNumericValue(source[currentPosition++]))
1366 // Character.getNumericValue(source[currentPosition++]))
1369 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1371 // currentCharacter =
1372 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1376 //handle the \\u case manually into comment
1377 // if (currentCharacter == '\\') {
1378 // if (source[currentPosition] == '\\')
1379 // currentPosition++;
1380 // } //jump over the \\
1381 boolean isUnicode = false;
1382 while (currentCharacter != '\r' && currentCharacter != '\n') {
1383 if (currentCharacter == '?') {
1384 if (getNextChar('>')) {
1385 startPosition = currentPosition - 2;
1387 return TokenNameStopPHP;
1393 currentCharacter = source[currentPosition++];
1394 // if (((currentCharacter = source[currentPosition++])
1396 // && (source[currentPosition] == 'u')) {
1397 // isUnicode = true;
1398 // //-------------unicode traitement ------------
1399 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1400 // currentPosition++;
1401 // while (source[currentPosition] == 'u') {
1402 // currentPosition++;
1405 // Character.getNumericValue(source[currentPosition++]))
1409 // Character.getNumericValue(
1410 // source[currentPosition++]))
1414 // Character.getNumericValue(
1415 // source[currentPosition++]))
1419 // Character.getNumericValue(
1420 // source[currentPosition++]))
1423 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1425 // currentCharacter =
1426 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1429 //handle the \\u case manually into comment
1430 // if (currentCharacter == '\\') {
1431 // if (source[currentPosition] == '\\')
1432 // currentPosition++;
1433 // } //jump over the \\
1436 endPositionForLineComment = currentPosition - 6;
1438 endPositionForLineComment = currentPosition - 1;
1440 recordComment(false);
1441 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1442 checkNonExternalizeString();
1443 if (recordLineSeparator) {
1445 pushUnicodeLineSeparator();
1447 pushLineSeparator();
1453 if (tokenizeComments) {
1455 currentPosition = endPositionForLineComment;
1456 // reset one character behind
1458 return TokenNameCOMMENT_LINE;
1460 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1461 if (tokenizeComments) {
1463 // reset one character behind
1464 return TokenNameCOMMENT_LINE;
1470 //traditional and annotation comment
1471 boolean isJavadoc = false, star = false;
1472 // consume next character
1473 unicodeAsBackSlash = false;
1474 currentCharacter = source[currentPosition++];
1475 // if (((currentCharacter = source[currentPosition++]) == '\\')
1476 // && (source[currentPosition] == 'u')) {
1477 // getNextUnicodeChar();
1479 // if (withoutUnicodePtr != 0) {
1480 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1481 // currentCharacter;
1485 if (currentCharacter == '*') {
1489 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1490 checkNonExternalizeString();
1491 if (recordLineSeparator) {
1492 pushLineSeparator();
1497 try { //get the next char
1498 currentCharacter = source[currentPosition++];
1499 // if (((currentCharacter = source[currentPosition++])
1501 // && (source[currentPosition] == 'u')) {
1502 // //-------------unicode traitement ------------
1503 // getNextUnicodeChar();
1505 //handle the \\u case manually into comment
1506 // if (currentCharacter == '\\') {
1507 // if (source[currentPosition] == '\\')
1508 // currentPosition++;
1509 // //jump over the \\
1511 // empty comment is not a javadoc /**/
1512 if (currentCharacter == '/') {
1515 //loop until end of comment */
1516 while ((currentCharacter != '/') || (!star)) {
1517 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1518 checkNonExternalizeString();
1519 if (recordLineSeparator) {
1520 pushLineSeparator();
1525 star = currentCharacter == '*';
1527 currentCharacter = source[currentPosition++];
1528 // if (((currentCharacter = source[currentPosition++])
1530 // && (source[currentPosition] == 'u')) {
1531 // //-------------unicode traitement ------------
1532 // getNextUnicodeChar();
1534 //handle the \\u case manually into comment
1535 // if (currentCharacter == '\\') {
1536 // if (source[currentPosition] == '\\')
1537 // currentPosition++;
1538 // } //jump over the \\
1540 recordComment(isJavadoc);
1541 if (tokenizeComments) {
1543 return TokenNameCOMMENT_PHPDOC;
1544 return TokenNameCOMMENT_BLOCK;
1546 } catch (IndexOutOfBoundsException e) {
1547 throw new InvalidInputException(UNTERMINATED_COMMENT);
1551 if (getNextChar('='))
1552 return TokenNameDIVIDE_EQUAL;
1553 return TokenNameDIVIDE;
1557 return TokenNameEOF;
1558 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1559 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1562 if (currentCharacter == '$') {
1563 while ((currentCharacter = source[currentPosition++]) == '$') {
1565 if (currentCharacter == '{')
1566 return TokenNameDOLLAR_LBRACE;
1567 if (isPHPIdentifierStart(currentCharacter))
1568 return scanIdentifierOrKeyword(true);
1569 return TokenNameERROR;
1571 if (isPHPIdentifierStart(currentCharacter))
1572 return scanIdentifierOrKeyword(false);
1573 if (Character.isDigit(currentCharacter))
1574 return scanNumber(false);
1575 return TokenNameERROR;
1578 } //-----------------end switch while try--------------------
1579 catch (IndexOutOfBoundsException e) {
1582 return TokenNameEOF;
1585 // public final void getNextUnicodeChar()
1586 // throws IndexOutOfBoundsException, InvalidInputException {
1588 // //handle the case of unicode.
1589 // //when a unicode appears then we must use a buffer that holds char internal values
1590 // //At the end of this method currentCharacter holds the new visited char
1591 // //and currentPosition points right next after it
1593 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1595 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1596 // currentPosition++;
1597 // while (source[currentPosition] == 'u') {
1598 // currentPosition++;
1602 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1604 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1606 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1608 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1610 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1612 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1613 // //need the unicode buffer
1614 // if (withoutUnicodePtr == 0) {
1615 // //buffer all the entries that have been left aside....
1616 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1617 // System.arraycopy(
1620 // withoutUnicodeBuffer,
1622 // withoutUnicodePtr);
1624 // //fill the buffer with the char
1625 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1627 // unicodeAsBackSlash = currentCharacter == '\\';
1629 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1631 public final void jumpOverMethodBody() {
1633 this.wasAcr = false;
1636 while (true) { //loop for jumping over comments
1637 // ---------Consume white space and handles startPosition---------
1638 boolean isWhiteSpace;
1640 startPosition = currentPosition;
1641 currentCharacter = source[currentPosition++];
1642 // if (((currentCharacter = source[currentPosition++]) == '\\')
1643 // && (source[currentPosition] == 'u')) {
1644 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1646 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1647 pushLineSeparator();
1648 isWhiteSpace = Character.isWhitespace(currentCharacter);
1650 } while (isWhiteSpace);
1652 // -------consume token until } is found---------
1653 switch (currentCharacter) {
1665 test = getNextChar('\\');
1668 scanDoubleQuotedEscapeCharacter();
1669 } catch (InvalidInputException ex) {
1672 // try { // consume next character
1673 unicodeAsBackSlash = false;
1674 currentCharacter = source[currentPosition++];
1675 // if (((currentCharacter = source[currentPosition++]) == '\\')
1676 // && (source[currentPosition] == 'u')) {
1677 // getNextUnicodeChar();
1679 if (withoutUnicodePtr != 0) {
1680 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1683 // } catch (InvalidInputException ex) {
1691 // try { // consume next character
1692 unicodeAsBackSlash = false;
1693 currentCharacter = source[currentPosition++];
1694 // if (((currentCharacter = source[currentPosition++]) == '\\')
1695 // && (source[currentPosition] == 'u')) {
1696 // getNextUnicodeChar();
1698 if (withoutUnicodePtr != 0) {
1699 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1702 // } catch (InvalidInputException ex) {
1704 while (currentCharacter != '"') {
1705 if (currentCharacter == '\r') {
1706 if (source[currentPosition] == '\n')
1709 // the string cannot go further that the line
1711 if (currentCharacter == '\n') {
1713 // the string cannot go further that the line
1715 if (currentCharacter == '\\') {
1717 scanDoubleQuotedEscapeCharacter();
1718 } catch (InvalidInputException ex) {
1721 // try { // consume next character
1722 unicodeAsBackSlash = false;
1723 currentCharacter = source[currentPosition++];
1724 // if (((currentCharacter = source[currentPosition++]) == '\\')
1725 // && (source[currentPosition] == 'u')) {
1726 // getNextUnicodeChar();
1728 if (withoutUnicodePtr != 0) {
1729 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1732 // } catch (InvalidInputException ex) {
1735 } catch (IndexOutOfBoundsException e) {
1742 if ((test = getNextChar('/', '*')) == 0) {
1746 currentCharacter = source[currentPosition++];
1747 // if (((currentCharacter = source[currentPosition++]) == '\\')
1748 // && (source[currentPosition] == 'u')) {
1749 // //-------------unicode traitement ------------
1750 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1751 // currentPosition++;
1752 // while (source[currentPosition] == 'u') {
1753 // currentPosition++;
1756 // Character.getNumericValue(source[currentPosition++]))
1760 // Character.getNumericValue(source[currentPosition++]))
1764 // Character.getNumericValue(source[currentPosition++]))
1768 // Character.getNumericValue(source[currentPosition++]))
1771 // //error don't care of the value
1772 // currentCharacter = 'A';
1773 // } //something different from \n and \r
1775 // currentCharacter =
1776 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1780 while (currentCharacter != '\r' && currentCharacter != '\n') {
1782 currentCharacter = source[currentPosition++];
1783 // if (((currentCharacter = source[currentPosition++])
1785 // && (source[currentPosition] == 'u')) {
1786 // //-------------unicode traitement ------------
1787 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1788 // currentPosition++;
1789 // while (source[currentPosition] == 'u') {
1790 // currentPosition++;
1793 // Character.getNumericValue(source[currentPosition++]))
1797 // Character.getNumericValue(source[currentPosition++]))
1801 // Character.getNumericValue(source[currentPosition++]))
1805 // Character.getNumericValue(source[currentPosition++]))
1808 // //error don't care of the value
1809 // currentCharacter = 'A';
1810 // } //something different from \n and \r
1812 // currentCharacter =
1813 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1817 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1818 pushLineSeparator();
1819 } catch (IndexOutOfBoundsException e) {
1820 } //an eof will them be generated
1824 //traditional and annotation comment
1825 boolean star = false;
1826 // try { // consume next character
1827 unicodeAsBackSlash = false;
1828 currentCharacter = source[currentPosition++];
1829 // if (((currentCharacter = source[currentPosition++]) == '\\')
1830 // && (source[currentPosition] == 'u')) {
1831 // getNextUnicodeChar();
1833 if (withoutUnicodePtr != 0) {
1834 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1837 // } catch (InvalidInputException ex) {
1839 if (currentCharacter == '*') {
1842 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1843 pushLineSeparator();
1844 try { //get the next char
1845 currentCharacter = source[currentPosition++];
1846 // if (((currentCharacter = source[currentPosition++]) == '\\')
1847 // && (source[currentPosition] == 'u')) {
1848 // //-------------unicode traitement ------------
1849 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1850 // currentPosition++;
1851 // while (source[currentPosition] == 'u') {
1852 // currentPosition++;
1855 // Character.getNumericValue(source[currentPosition++]))
1859 // Character.getNumericValue(source[currentPosition++]))
1863 // Character.getNumericValue(source[currentPosition++]))
1867 // Character.getNumericValue(source[currentPosition++]))
1870 // //error don't care of the value
1871 // currentCharacter = 'A';
1872 // } //something different from * and /
1874 // currentCharacter =
1875 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1878 //loop until end of comment */
1879 while ((currentCharacter != '/') || (!star)) {
1880 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1881 pushLineSeparator();
1882 star = currentCharacter == '*';
1884 currentCharacter = source[currentPosition++];
1885 // if (((currentCharacter = source[currentPosition++])
1887 // && (source[currentPosition] == 'u')) {
1888 // //-------------unicode traitement ------------
1889 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1890 // currentPosition++;
1891 // while (source[currentPosition] == 'u') {
1892 // currentPosition++;
1895 // Character.getNumericValue(source[currentPosition++]))
1899 // Character.getNumericValue(source[currentPosition++]))
1903 // Character.getNumericValue(source[currentPosition++]))
1907 // Character.getNumericValue(source[currentPosition++]))
1910 // //error don't care of the value
1911 // currentCharacter = 'A';
1912 // } //something different from * and /
1914 // currentCharacter =
1915 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1919 } catch (IndexOutOfBoundsException e) {
1928 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1930 scanIdentifierOrKeyword((currentCharacter == '$'));
1931 } catch (InvalidInputException ex) {
1935 if (Character.isDigit(currentCharacter)) {
1938 } catch (InvalidInputException ex) {
1944 //-----------------end switch while try--------------------
1945 } catch (IndexOutOfBoundsException e) {
1946 } catch (InvalidInputException e) {
1950 // public final boolean jumpOverUnicodeWhiteSpace()
1951 // throws InvalidInputException {
1953 // //handle the case of unicode. Jump over the next whiteSpace
1954 // //making startPosition pointing on the next available char
1955 // //On false, the currentCharacter is filled up with a potential
1959 // this.wasAcr = false;
1960 // int c1, c2, c3, c4;
1961 // int unicodeSize = 6;
1962 // currentPosition++;
1963 // while (source[currentPosition] == 'u') {
1964 // currentPosition++;
1968 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1970 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1972 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1974 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1976 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1979 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1980 // if (recordLineSeparator
1981 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1982 // pushLineSeparator();
1983 // if (Character.isWhitespace(currentCharacter))
1986 // //buffer the new char which is not a white space
1987 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1988 // //withoutUnicodePtr == 1 is true here
1990 // } catch (IndexOutOfBoundsException e) {
1991 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1994 public final int[] getLineEnds() {
1995 //return a bounded copy of this.lineEnds
1998 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2002 public char[] getSource() {
2005 final char[] optimizedCurrentTokenSource1() {
2006 //return always the same char[] build only once
2008 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2009 char charOne = source[startPosition];
2064 return new char[] { charOne };
2068 final char[] optimizedCurrentTokenSource2() {
2069 //try to return the same char[] build only once
2072 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2073 char[][] table = charArray_length[0][hash];
2075 while (++i < InternalTableSize) {
2076 char[] charArray = table[i];
2077 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2080 //---------other side---------
2082 int max = newEntry2;
2083 while (++i <= max) {
2084 char[] charArray = table[i];
2085 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2088 //--------add the entry-------
2089 if (++max >= InternalTableSize)
2092 table[max] = (r = new char[] { c0, c1 });
2097 final char[] optimizedCurrentTokenSource3() {
2098 //try to return the same char[] build only once
2102 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2104 char[][] table = charArray_length[1][hash];
2106 while (++i < InternalTableSize) {
2107 char[] charArray = table[i];
2108 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2111 //---------other side---------
2113 int max = newEntry3;
2114 while (++i <= max) {
2115 char[] charArray = table[i];
2116 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2119 //--------add the entry-------
2120 if (++max >= InternalTableSize)
2123 table[max] = (r = new char[] { c0, c1, c2 });
2128 final char[] optimizedCurrentTokenSource4() {
2129 //try to return the same char[] build only once
2131 char c0, c1, c2, c3;
2133 ((((long) (c0 = source[startPosition])) << 18)
2134 + ((c1 = source[startPosition + 1]) << 12)
2135 + ((c2 = source[startPosition + 2]) << 6)
2136 + (c3 = source[startPosition + 3]))
2138 char[][] table = charArray_length[2][(int) hash];
2140 while (++i < InternalTableSize) {
2141 char[] charArray = table[i];
2142 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2145 //---------other side---------
2147 int max = newEntry4;
2148 while (++i <= max) {
2149 char[] charArray = table[i];
2150 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2153 //--------add the entry-------
2154 if (++max >= InternalTableSize)
2157 table[max] = (r = new char[] { c0, c1, c2, c3 });
2163 final char[] optimizedCurrentTokenSource5() {
2164 //try to return the same char[] build only once
2166 char c0, c1, c2, c3, c4;
2168 ((((long) (c0 = source[startPosition])) << 24)
2169 + (((long) (c1 = source[startPosition + 1])) << 18)
2170 + ((c2 = source[startPosition + 2]) << 12)
2171 + ((c3 = source[startPosition + 3]) << 6)
2172 + (c4 = source[startPosition + 4]))
2174 char[][] table = charArray_length[3][(int) hash];
2176 while (++i < InternalTableSize) {
2177 char[] charArray = table[i];
2178 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2181 //---------other side---------
2183 int max = newEntry5;
2184 while (++i <= max) {
2185 char[] charArray = table[i];
2186 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2189 //--------add the entry-------
2190 if (++max >= InternalTableSize)
2193 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2199 final char[] optimizedCurrentTokenSource6() {
2200 //try to return the same char[] build only once
2202 char c0, c1, c2, c3, c4, c5;
2204 ((((long) (c0 = source[startPosition])) << 32)
2205 + (((long) (c1 = source[startPosition + 1])) << 24)
2206 + (((long) (c2 = source[startPosition + 2])) << 18)
2207 + ((c3 = source[startPosition + 3]) << 12)
2208 + ((c4 = source[startPosition + 4]) << 6)
2209 + (c5 = source[startPosition + 5]))
2211 char[][] table = charArray_length[4][(int) hash];
2213 while (++i < InternalTableSize) {
2214 char[] charArray = table[i];
2215 if ((c0 == charArray[0])
2216 && (c1 == charArray[1])
2217 && (c2 == charArray[2])
2218 && (c3 == charArray[3])
2219 && (c4 == charArray[4])
2220 && (c5 == charArray[5]))
2223 //---------other side---------
2225 int max = newEntry6;
2226 while (++i <= max) {
2227 char[] charArray = table[i];
2228 if ((c0 == charArray[0])
2229 && (c1 == charArray[1])
2230 && (c2 == charArray[2])
2231 && (c3 == charArray[3])
2232 && (c4 == charArray[4])
2233 && (c5 == charArray[5]))
2236 //--------add the entry-------
2237 if (++max >= InternalTableSize)
2240 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2245 public final void pushLineSeparator() throws InvalidInputException {
2246 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2247 final int INCREMENT = 250;
2249 if (this.checkNonExternalizedStringLiterals) {
2250 // reinitialize the current line for non externalize strings purpose
2253 //currentCharacter is at position currentPosition-1
2256 if (currentCharacter == '\r') {
2257 int separatorPos = currentPosition - 1;
2258 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2260 //System.out.println("CR-" + separatorPos);
2262 lineEnds[++linePtr] = separatorPos;
2263 } catch (IndexOutOfBoundsException e) {
2264 //linePtr value is correct
2265 int oldLength = lineEnds.length;
2266 int[] old = lineEnds;
2267 lineEnds = new int[oldLength + INCREMENT];
2268 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2269 lineEnds[linePtr] = separatorPos;
2271 // look-ahead for merged cr+lf
2273 if (source[currentPosition] == '\n') {
2274 //System.out.println("look-ahead LF-" + currentPosition);
2275 lineEnds[linePtr] = currentPosition;
2281 } catch (IndexOutOfBoundsException e) {
2286 if (currentCharacter == '\n') {
2287 //must merge eventual cr followed by lf
2288 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2289 //System.out.println("merge LF-" + (currentPosition - 1));
2290 lineEnds[linePtr] = currentPosition - 1;
2292 int separatorPos = currentPosition - 1;
2293 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2295 // System.out.println("LF-" + separatorPos);
2297 lineEnds[++linePtr] = separatorPos;
2298 } catch (IndexOutOfBoundsException e) {
2299 //linePtr value is correct
2300 int oldLength = lineEnds.length;
2301 int[] old = lineEnds;
2302 lineEnds = new int[oldLength + INCREMENT];
2303 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2304 lineEnds[linePtr] = separatorPos;
2311 public final void pushUnicodeLineSeparator() {
2312 // isUnicode means that the \r or \n has been read as a unicode character
2314 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2316 final int INCREMENT = 250;
2317 //currentCharacter is at position currentPosition-1
2319 if (this.checkNonExternalizedStringLiterals) {
2320 // reinitialize the current line for non externalize strings purpose
2325 if (currentCharacter == '\r') {
2326 int separatorPos = currentPosition - 6;
2327 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2329 //System.out.println("CR-" + separatorPos);
2331 lineEnds[++linePtr] = separatorPos;
2332 } catch (IndexOutOfBoundsException e) {
2333 //linePtr value is correct
2334 int oldLength = lineEnds.length;
2335 int[] old = lineEnds;
2336 lineEnds = new int[oldLength + INCREMENT];
2337 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2338 lineEnds[linePtr] = separatorPos;
2340 // look-ahead for merged cr+lf
2341 if (source[currentPosition] == '\n') {
2342 //System.out.println("look-ahead LF-" + currentPosition);
2343 lineEnds[linePtr] = currentPosition;
2351 if (currentCharacter == '\n') {
2352 //must merge eventual cr followed by lf
2353 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2354 //System.out.println("merge LF-" + (currentPosition - 1));
2355 lineEnds[linePtr] = currentPosition - 6;
2357 int separatorPos = currentPosition - 6;
2358 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2360 // System.out.println("LF-" + separatorPos);
2362 lineEnds[++linePtr] = separatorPos;
2363 } catch (IndexOutOfBoundsException e) {
2364 //linePtr value is correct
2365 int oldLength = lineEnds.length;
2366 int[] old = lineEnds;
2367 lineEnds = new int[oldLength + INCREMENT];
2368 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2369 lineEnds[linePtr] = separatorPos;
2376 public final void recordComment(boolean isJavadoc) {
2378 // a new annotation comment is recorded
2380 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2381 } catch (IndexOutOfBoundsException e) {
2382 int oldStackLength = commentStops.length;
2383 int[] oldStack = commentStops;
2384 commentStops = new int[oldStackLength + 30];
2385 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2386 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2387 //grows the positions buffers too
2388 int[] old = commentStarts;
2389 commentStarts = new int[oldStackLength + 30];
2390 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2393 //the buffer is of a correct size here
2394 commentStarts[commentPtr] = startPosition;
2396 public void resetTo(int begin, int end) {
2397 //reset the scanner to a given position where it may rescan again
2400 initialPosition = startPosition = currentPosition = begin;
2401 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2402 commentPtr = -1; // reset comment stack
2405 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2406 // the string with "\\u" is a legal string of two chars \ and u
2407 //thus we use a direct access to the source (for regular cases).
2409 // if (unicodeAsBackSlash) {
2410 // // consume next character
2411 // unicodeAsBackSlash = false;
2412 // if (((currentCharacter = source[currentPosition++]) == '\\')
2413 // && (source[currentPosition] == 'u')) {
2414 // getNextUnicodeChar();
2416 // if (withoutUnicodePtr != 0) {
2417 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2421 currentCharacter = source[currentPosition++];
2422 switch (currentCharacter) {
2424 currentCharacter = '\'';
2427 currentCharacter = '\\';
2430 currentCharacter = '\\';
2435 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2436 // the string with "\\u" is a legal string of two chars \ and u
2437 //thus we use a direct access to the source (for regular cases).
2439 // if (unicodeAsBackSlash) {
2440 // // consume next character
2441 // unicodeAsBackSlash = false;
2442 // if (((currentCharacter = source[currentPosition++]) == '\\')
2443 // && (source[currentPosition] == 'u')) {
2444 // getNextUnicodeChar();
2446 // if (withoutUnicodePtr != 0) {
2447 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2451 currentCharacter = source[currentPosition++];
2452 switch (currentCharacter) {
2454 // currentCharacter = '\b';
2457 currentCharacter = '\t';
2460 currentCharacter = '\n';
2463 // currentCharacter = '\f';
2466 currentCharacter = '\r';
2469 currentCharacter = '\"';
2472 currentCharacter = '\'';
2475 currentCharacter = '\\';
2478 currentCharacter = '$';
2481 // -----------octal escape--------------
2483 // OctalDigit OctalDigit
2484 // ZeroToThree OctalDigit OctalDigit
2486 int number = Character.getNumericValue(currentCharacter);
2487 if (number >= 0 && number <= 7) {
2488 boolean zeroToThreeNot = number > 3;
2489 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2490 int digit = Character.getNumericValue(currentCharacter);
2491 if (digit >= 0 && digit <= 7) {
2492 number = (number * 8) + digit;
2493 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2494 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2497 digit = Character.getNumericValue(currentCharacter);
2498 if (digit >= 0 && digit <= 7) {
2499 // has read \ZeroToThree OctalDigit OctalDigit
2500 number = (number * 8) + digit;
2501 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2505 } else { // has read \OctalDigit NonDigit--> ignore last character
2508 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2511 } else { // has read \OctalDigit --> ignore last character
2515 throw new InvalidInputException(INVALID_ESCAPE);
2516 currentCharacter = (char) number;
2519 // throw new InvalidInputException(INVALID_ESCAPE);
2523 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2524 // return scanIdentifierOrKeyword( false );
2527 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2530 //first dispatch on the first char.
2531 //then the length. If there are several
2532 //keywors with the same length AND the same first char, then do another
2533 //disptach on the second char :-)...cool....but fast !
2535 useAssertAsAnIndentifier = false;
2537 while (getNextCharAsJavaIdentifierPart()) {
2541 return TokenNameVariable;
2546 // if (withoutUnicodePtr == 0)
2548 //quick test on length == 1 but not on length > 12 while most identifier
2549 //have a length which is <= 12...but there are lots of identifier with
2553 if ((length = currentPosition - startPosition) == 1)
2554 return TokenNameIdentifier;
2556 data = new char[length];
2557 index = startPosition;
2558 for (int i = 0; i < length; i++) {
2559 data[i] = Character.toLowerCase(source[index + i]);
2563 // if ((length = withoutUnicodePtr) == 1)
2564 // return TokenNameIdentifier;
2565 // // data = withoutUnicodeBuffer;
2566 // data = new char[withoutUnicodeBuffer.length];
2567 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2568 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2573 firstLetter = data[index];
2574 switch (firstLetter) {
2576 case 'a' : // as and array
2579 if ((data[++index] == 's')) {
2582 return TokenNameIdentifier;
2585 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2586 return TokenNameAND;
2588 return TokenNameIdentifier;
2591 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2592 // return TokenNamearray;
2594 // return TokenNameIdentifier;
2596 return TokenNameIdentifier;
2601 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2602 return TokenNamebreak;
2604 return TokenNameIdentifier;
2606 return TokenNameIdentifier;
2609 case 'c' : //case class continue
2612 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2613 return TokenNamecase;
2615 return TokenNameIdentifier;
2617 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2618 return TokenNameclass;
2620 return TokenNameIdentifier;
2622 if ((data[++index] == 'o')
2623 && (data[++index] == 'n')
2624 && (data[++index] == 't')
2625 && (data[++index] == 'i')
2626 && (data[++index] == 'n')
2627 && (data[++index] == 'u')
2628 && (data[++index] == 'e'))
2629 return TokenNamecontinue;
2631 return TokenNameIdentifier;
2633 return TokenNameIdentifier;
2636 case 'd' : //define default do
2639 if ((data[++index] == 'o'))
2642 return TokenNameIdentifier;
2644 if ((data[++index] == 'e')
2645 && (data[++index] == 'f')
2646 && (data[++index] == 'i')
2647 && (data[++index] == 'n')
2648 && (data[++index] == 'e'))
2649 return TokenNamedefine;
2651 return TokenNameIdentifier;
2653 if ((data[++index] == 'e')
2654 && (data[++index] == 'f')
2655 && (data[++index] == 'a')
2656 && (data[++index] == 'u')
2657 && (data[++index] == 'l')
2658 && (data[++index] == 't'))
2659 return TokenNamedefault;
2661 return TokenNameIdentifier;
2663 return TokenNameIdentifier;
2665 case 'e' : //echo else elseif extends
2668 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2669 return TokenNameecho;
2670 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2671 return TokenNameelse;
2673 return TokenNameIdentifier;
2675 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2676 return TokenNameendif;
2678 return TokenNameIdentifier;
2680 if ((data[++index] == 'n')
2681 && (data[++index] == 'd')
2682 && (data[++index] == 'f')
2683 && (data[++index] == 'o')
2684 && (data[++index] == 'r'))
2685 return TokenNameendfor;
2687 (data[index] == 'l')
2688 && (data[++index] == 's')
2689 && (data[++index] == 'e')
2690 && (data[++index] == 'i')
2691 && (data[++index] == 'f'))
2692 return TokenNameelseif;
2694 return TokenNameIdentifier;
2696 if ((data[++index] == 'x')
2697 && (data[++index] == 't')
2698 && (data[++index] == 'e')
2699 && (data[++index] == 'n')
2700 && (data[++index] == 'd')
2701 && (data[++index] == 's'))
2702 return TokenNameextends;
2704 return TokenNameIdentifier;
2705 case 8 : // endwhile
2706 if ((data[++index] == 'n')
2707 && (data[++index] == 'd')
2708 && (data[++index] == 'w')
2709 && (data[++index] == 'h')
2710 && (data[++index] == 'i')
2711 && (data[++index] == 'l')
2712 && (data[++index] == 'e'))
2713 return TokenNameendwhile;
2715 return TokenNameIdentifier;
2716 case 9 : // endswitch
2717 if ((data[++index] == 'n')
2718 && (data[++index] == 'd')
2719 && (data[++index] == 's')
2720 && (data[++index] == 'w')
2721 && (data[++index] == 'i')
2722 && (data[++index] == 't')
2723 && (data[++index] == 'c')
2724 && (data[++index] == 'h'))
2725 return TokenNameendswitch;
2727 return TokenNameIdentifier;
2728 case 10 : // endforeach
2729 if ((data[++index] == 'n')
2730 && (data[++index] == 'd')
2731 && (data[++index] == 'f')
2732 && (data[++index] == 'o')
2733 && (data[++index] == 'r')
2734 && (data[++index] == 'e')
2735 && (data[++index] == 'a')
2736 && (data[++index] == 'c')
2737 && (data[++index] == 'h'))
2738 return TokenNameendforeach;
2740 return TokenNameIdentifier;
2743 return TokenNameIdentifier;
2746 case 'f' : //for false function
2749 if ((data[++index] == 'o') && (data[++index] == 'r'))
2750 return TokenNamefor;
2752 return TokenNameIdentifier;
2754 if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2755 return TokenNamefalse;
2757 return TokenNameIdentifier;
2758 case 7 : // function
2759 if ((data[++index] == 'o')
2760 && (data[++index] == 'r')
2761 && (data[++index] == 'e')
2762 && (data[++index] == 'a')
2763 && (data[++index] == 'c')
2764 && (data[++index] == 'h'))
2765 return TokenNameforeach;
2767 return TokenNameIdentifier;
2768 case 8 : // function
2769 if ((data[++index] == 'u')
2770 && (data[++index] == 'n')
2771 && (data[++index] == 'c')
2772 && (data[++index] == 't')
2773 && (data[++index] == 'i')
2774 && (data[++index] == 'o')
2775 && (data[++index] == 'n'))
2776 return TokenNamefunction;
2778 return TokenNameIdentifier;
2780 return TokenNameIdentifier;
2784 if ((data[++index] == 'l')
2785 && (data[++index] == 'o')
2786 && (data[++index] == 'b')
2787 && (data[++index] == 'a')
2788 && (data[++index] == 'l')) {
2789 return TokenNameglobal;
2792 return TokenNameIdentifier;
2797 if (data[++index] == 'f')
2800 return TokenNameIdentifier;
2802 // if ((data[++index] == 'n') && (data[++index] == 't'))
2803 // return TokenNameint;
2805 // return TokenNameIdentifier;
2807 if ((data[++index] == 'n')
2808 && (data[++index] == 'c')
2809 && (data[++index] == 'l')
2810 && (data[++index] == 'u')
2811 && (data[++index] == 'd')
2812 && (data[++index] == 'e'))
2813 return TokenNameinclude;
2815 return TokenNameIdentifier;
2817 if ((data[++index] == 'n')
2818 && (data[++index] == 'c')
2819 && (data[++index] == 'l')
2820 && (data[++index] == 'u')
2821 && (data[++index] == 'd')
2822 && (data[++index] == 'e')
2823 && (data[++index] == '_')
2824 && (data[++index] == 'o')
2825 && (data[++index] == 'n')
2826 && (data[++index] == 'c')
2827 && (data[++index] == 'e'))
2828 return TokenNameinclude_once;
2830 return TokenNameIdentifier;
2832 return TokenNameIdentifier;
2837 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2838 return TokenNamelist;
2841 return TokenNameIdentifier;
2843 case 'n' : // new null
2846 if ((data[++index] == 'e') && (data[++index] == 'w'))
2847 return TokenNamenew;
2849 return TokenNameIdentifier;
2851 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2852 return TokenNamenull;
2854 return TokenNameIdentifier;
2857 return TokenNameIdentifier;
2859 case 'o' : // or old_function
2861 if (data[++index] == 'r') {
2865 // if (length == 12) {
2866 // if ((data[++index] == 'l')
2867 // && (data[++index] == 'd')
2868 // && (data[++index] == '_')
2869 // && (data[++index] == 'f')
2870 // && (data[++index] == 'u')
2871 // && (data[++index] == 'n')
2872 // && (data[++index] == 'c')
2873 // && (data[++index] == 't')
2874 // && (data[++index] == 'i')
2875 // && (data[++index] == 'o')
2876 // && (data[++index] == 'n')) {
2877 // return TokenNameold_function;
2880 return TokenNameIdentifier;
2884 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2885 return TokenNameprint;
2888 return TokenNameIdentifier;
2889 case 'r' : //return require require_once
2891 if ((data[++index] == 'e')
2892 && (data[++index] == 't')
2893 && (data[++index] == 'u')
2894 && (data[++index] == 'r')
2895 && (data[++index] == 'n')) {
2896 return TokenNamereturn;
2898 } else if (length == 7) {
2899 if ((data[++index] == 'e')
2900 && (data[++index] == 'q')
2901 && (data[++index] == 'u')
2902 && (data[++index] == 'i')
2903 && (data[++index] == 'r')
2904 && (data[++index] == 'e')) {
2905 return TokenNamerequire;
2907 } else if (length == 12) {
2908 if ((data[++index] == 'e')
2909 && (data[++index] == 'q')
2910 && (data[++index] == 'u')
2911 && (data[++index] == 'i')
2912 && (data[++index] == 'r')
2913 && (data[++index] == 'e')
2914 && (data[++index] == '_')
2915 && (data[++index] == 'o')
2916 && (data[++index] == 'n')
2917 && (data[++index] == 'c')
2918 && (data[++index] == 'e')) {
2919 return TokenNamerequire_once;
2922 return TokenNameIdentifier;
2924 case 's' : //static switch
2927 if (data[++index] == 't')
2928 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2929 return TokenNamestatic;
2931 return TokenNameIdentifier;
2933 (data[index] == 'w')
2934 && (data[++index] == 'i')
2935 && (data[++index] == 't')
2936 && (data[++index] == 'c')
2937 && (data[++index] == 'h'))
2938 return TokenNameswitch;
2940 return TokenNameIdentifier;
2942 return TokenNameIdentifier;
2949 if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2950 return TokenNametrue;
2952 return TokenNameIdentifier;
2953 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2954 // return TokenNamethis;
2957 return TokenNameIdentifier;
2963 if ((data[++index] == 'a') && (data[++index] == 'r'))
2964 return TokenNamevar;
2966 return TokenNameIdentifier;
2969 return TokenNameIdentifier;
2975 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2976 return TokenNamewhile;
2978 return TokenNameIdentifier;
2979 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2980 //return TokenNamewidefp ;
2982 //return TokenNameIdentifier;
2984 return TokenNameIdentifier;
2990 if ((data[++index] == 'o') && (data[++index] == 'r'))
2991 return TokenNameXOR;
2993 return TokenNameIdentifier;
2996 return TokenNameIdentifier;
2999 return TokenNameIdentifier;
3002 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3004 //when entering this method the currentCharacter is the firt
3005 //digit of the number , i.e. it may be preceeded by a . when
3008 boolean floating = dotPrefix;
3009 if ((!dotPrefix) && (currentCharacter == '0')) {
3010 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3011 //force the first char of the hexa number do exist...
3012 // consume next character
3013 unicodeAsBackSlash = false;
3014 currentCharacter = source[currentPosition++];
3015 // if (((currentCharacter = source[currentPosition++]) == '\\')
3016 // && (source[currentPosition] == 'u')) {
3017 // getNextUnicodeChar();
3019 // if (withoutUnicodePtr != 0) {
3020 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3023 if (Character.digit(currentCharacter, 16) == -1)
3024 throw new InvalidInputException(INVALID_HEXA);
3026 while (getNextCharAsDigit(16)) {
3028 // if (getNextChar('l', 'L') >= 0)
3029 // return TokenNameLongLiteral;
3031 return TokenNameIntegerLiteral;
3034 //there is x or X in the number
3035 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3036 if (getNextCharAsDigit()) {
3037 //-------------potential octal-----------------
3038 while (getNextCharAsDigit()) {
3041 // if (getNextChar('l', 'L') >= 0) {
3042 // return TokenNameLongLiteral;
3045 // if (getNextChar('f', 'F') >= 0) {
3046 // return TokenNameFloatingPointLiteral;
3049 if (getNextChar('d', 'D') >= 0) {
3050 return TokenNameDoubleLiteral;
3051 } else { //make the distinction between octal and float ....
3052 if (getNextChar('.')) { //bingo ! ....
3053 while (getNextCharAsDigit()) {
3055 if (getNextChar('e', 'E') >= 0) {
3056 // consume next character
3057 unicodeAsBackSlash = false;
3058 currentCharacter = source[currentPosition++];
3059 // if (((currentCharacter = source[currentPosition++]) == '\\')
3060 // && (source[currentPosition] == 'u')) {
3061 // getNextUnicodeChar();
3063 // if (withoutUnicodePtr != 0) {
3064 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3068 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3069 // consume next character
3070 unicodeAsBackSlash = false;
3071 currentCharacter = source[currentPosition++];
3072 // if (((currentCharacter = source[currentPosition++]) == '\\')
3073 // && (source[currentPosition] == 'u')) {
3074 // getNextUnicodeChar();
3076 // if (withoutUnicodePtr != 0) {
3077 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3078 // currentCharacter;
3082 if (!Character.isDigit(currentCharacter))
3083 throw new InvalidInputException(INVALID_FLOAT);
3084 while (getNextCharAsDigit()) {
3087 // if (getNextChar('f', 'F') >= 0)
3088 // return TokenNameFloatingPointLiteral;
3089 getNextChar('d', 'D'); //jump over potential d or D
3090 return TokenNameDoubleLiteral;
3092 return TokenNameIntegerLiteral;
3100 while (getNextCharAsDigit()) {
3103 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3104 // return TokenNameLongLiteral;
3106 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3107 while (getNextCharAsDigit()) {
3112 //if floating is true both exponant and suffix may be optional
3114 if (getNextChar('e', 'E') >= 0) {
3116 // consume next character
3117 unicodeAsBackSlash = false;
3118 currentCharacter = source[currentPosition++];
3119 // if (((currentCharacter = source[currentPosition++]) == '\\')
3120 // && (source[currentPosition] == 'u')) {
3121 // getNextUnicodeChar();
3123 // if (withoutUnicodePtr != 0) {
3124 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3128 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3129 unicodeAsBackSlash = false;
3130 currentCharacter = source[currentPosition++];
3131 // if (((currentCharacter = source[currentPosition++]) == '\\')
3132 // && (source[currentPosition] == 'u')) {
3133 // getNextUnicodeChar();
3135 // if (withoutUnicodePtr != 0) {
3136 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3140 if (!Character.isDigit(currentCharacter))
3141 throw new InvalidInputException(INVALID_FLOAT);
3142 while (getNextCharAsDigit()) {
3146 if (getNextChar('d', 'D') >= 0)
3147 return TokenNameDoubleLiteral;
3148 // if (getNextChar('f', 'F') >= 0)
3149 // return TokenNameFloatingPointLiteral;
3151 //the long flag has been tested before
3153 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3156 * Search the line number corresponding to a specific position
3159 public final int getLineNumber(int position) {
3161 if (lineEnds == null)
3163 int length = linePtr + 1;
3166 int g = 0, d = length - 1;
3170 if (position < lineEnds[m]) {
3172 } else if (position > lineEnds[m]) {
3178 if (position < lineEnds[m]) {
3184 public void setPHPMode(boolean mode) {
3188 public final void setSource(char[] source) {
3189 //the source-buffer is set to sourceString
3191 if (source == null) {
3192 this.source = new char[0];
3194 this.source = source;
3197 initialPosition = currentPosition = 0;
3198 containsAssertKeyword = false;
3199 withoutUnicodeBuffer = new char[this.source.length];
3203 public String toString() {
3204 if (startPosition == source.length)
3205 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3206 if (currentPosition > source.length)
3207 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3209 char front[] = new char[startPosition];
3210 System.arraycopy(source, 0, front, 0, startPosition);
3212 int middleLength = (currentPosition - 1) - startPosition + 1;
3214 if (middleLength > -1) {
3215 middle = new char[middleLength];
3216 System.arraycopy(source, startPosition, middle, 0, middleLength);
3218 middle = new char[0];
3221 char end[] = new char[source.length - (currentPosition - 1)];
3222 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3224 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3225 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3228 public final String toStringAction(int act) {
3231 case TokenNameERROR :
3232 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3233 case TokenNameStopPHP :
3234 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3235 case TokenNameIdentifier :
3236 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3237 case TokenNameVariable :
3238 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3240 return "as"; //$NON-NLS-1$
3241 case TokenNamebreak :
3242 return "break"; //$NON-NLS-1$
3243 case TokenNamecase :
3244 return "case"; //$NON-NLS-1$
3245 case TokenNameclass :
3246 return "class"; //$NON-NLS-1$
3247 case TokenNamecontinue :
3248 return "continue"; //$NON-NLS-1$
3249 case TokenNamedefault :
3250 return "default"; //$NON-NLS-1$
3251 case TokenNamedefine :
3252 return "define"; //$NON-NLS-1$
3254 return "do"; //$NON-NLS-1$
3255 case TokenNameecho :
3256 return "echo"; //$NON-NLS-1$
3257 case TokenNameelse :
3258 return "else"; //$NON-NLS-1$
3259 case TokenNameelseif :
3260 return "elseif"; //$NON-NLS-1$
3261 case TokenNameendfor :
3262 return "endfor"; //$NON-NLS-1$
3263 case TokenNameendforeach :
3264 return "endforeach"; //$NON-NLS-1$
3265 case TokenNameendif :
3266 return "endif"; //$NON-NLS-1$
3267 case TokenNameendswitch :
3268 return "endswitch"; //$NON-NLS-1$
3269 case TokenNameendwhile :
3270 return "endwhile"; //$NON-NLS-1$
3271 case TokenNameextends :
3272 return "extends"; //$NON-NLS-1$
3273 case TokenNamefalse :
3274 return "false"; //$NON-NLS-1$
3276 return "for"; //$NON-NLS-1$
3277 case TokenNameforeach :
3278 return "foreach"; //$NON-NLS-1$
3279 case TokenNamefunction :
3280 return "function"; //$NON-NLS-1$
3281 case TokenNameglobal :
3282 return "global"; //$NON-NLS-1$
3284 return "if"; //$NON-NLS-1$
3285 case TokenNameinclude :
3286 return "include"; //$NON-NLS-1$
3287 case TokenNameinclude_once :
3288 return "include_once"; //$NON-NLS-1$
3289 case TokenNamelist :
3290 return "list"; //$NON-NLS-1$
3292 return "new"; //$NON-NLS-1$
3293 case TokenNamenull :
3294 return "null"; //$NON-NLS-1$
3295 case TokenNameprint :
3296 return "print"; //$NON-NLS-1$
3297 case TokenNamerequire :
3298 return "require"; //$NON-NLS-1$
3299 case TokenNamerequire_once :
3300 return "require_once"; //$NON-NLS-1$
3301 case TokenNamereturn :
3302 return "return"; //$NON-NLS-1$
3303 case TokenNamestatic :
3304 return "static"; //$NON-NLS-1$
3305 case TokenNameswitch :
3306 return "switch"; //$NON-NLS-1$
3307 case TokenNametrue :
3308 return "true"; //$NON-NLS-1$
3310 return "var"; //$NON-NLS-1$
3311 case TokenNamewhile :
3312 return "while"; //$NON-NLS-1$
3313 case TokenNameIntegerLiteral :
3314 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3315 case TokenNameDoubleLiteral :
3316 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3317 case TokenNameStringLiteral :
3318 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3319 case TokenNameStringConstant :
3320 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3321 case TokenNameStringInterpolated :
3322 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3323 case TokenNameHEREDOC :
3324 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3326 case TokenNamePLUS_PLUS :
3327 return "++"; //$NON-NLS-1$
3328 case TokenNameMINUS_MINUS :
3329 return "--"; //$NON-NLS-1$
3330 case TokenNameEQUAL_EQUAL :
3331 return "=="; //$NON-NLS-1$
3332 case TokenNameEQUAL_GREATER :
3333 return "=>"; //$NON-NLS-1$
3334 case TokenNameLESS_EQUAL :
3335 return "<="; //$NON-NLS-1$
3336 case TokenNameGREATER_EQUAL :
3337 return ">="; //$NON-NLS-1$
3338 case TokenNameNOT_EQUAL :
3339 return "!="; //$NON-NLS-1$
3340 case TokenNameLEFT_SHIFT :
3341 return "<<"; //$NON-NLS-1$
3342 case TokenNameRIGHT_SHIFT :
3343 return ">>"; //$NON-NLS-1$
3344 case TokenNamePLUS_EQUAL :
3345 return "+="; //$NON-NLS-1$
3346 case TokenNameMINUS_EQUAL :
3347 return "-="; //$NON-NLS-1$
3348 case TokenNameMULTIPLY_EQUAL :
3349 return "*="; //$NON-NLS-1$
3350 case TokenNameDIVIDE_EQUAL :
3351 return "/="; //$NON-NLS-1$
3352 case TokenNameAND_EQUAL :
3353 return "&="; //$NON-NLS-1$
3354 case TokenNameOR_EQUAL :
3355 return "|="; //$NON-NLS-1$
3356 case TokenNameXOR_EQUAL :
3357 return "^="; //$NON-NLS-1$
3358 case TokenNameREMAINDER_EQUAL :
3359 return "%="; //$NON-NLS-1$
3360 case TokenNameLEFT_SHIFT_EQUAL :
3361 return "<<="; //$NON-NLS-1$
3362 case TokenNameRIGHT_SHIFT_EQUAL :
3363 return ">>="; //$NON-NLS-1$
3364 case TokenNameOR_OR :
3365 return "||"; //$NON-NLS-1$
3366 case TokenNameAND_AND :
3367 return "&&"; //$NON-NLS-1$
3368 case TokenNamePLUS :
3369 return "+"; //$NON-NLS-1$
3370 case TokenNameMINUS :
3371 return "-"; //$NON-NLS-1$
3372 case TokenNameMINUS_GREATER :
3375 return "!"; //$NON-NLS-1$
3376 case TokenNameREMAINDER :
3377 return "%"; //$NON-NLS-1$
3379 return "^"; //$NON-NLS-1$
3381 return "&"; //$NON-NLS-1$
3382 case TokenNameMULTIPLY :
3383 return "*"; //$NON-NLS-1$
3385 return "|"; //$NON-NLS-1$
3386 case TokenNameTWIDDLE :
3387 return "~"; //$NON-NLS-1$
3388 case TokenNameTWIDDLE_EQUAL :
3389 return "~="; //$NON-NLS-1$
3390 case TokenNameDIVIDE :
3391 return "/"; //$NON-NLS-1$
3392 case TokenNameGREATER :
3393 return ">"; //$NON-NLS-1$
3394 case TokenNameLESS :
3395 return "<"; //$NON-NLS-1$
3396 case TokenNameLPAREN :
3397 return "("; //$NON-NLS-1$
3398 case TokenNameRPAREN :
3399 return ")"; //$NON-NLS-1$
3400 case TokenNameLBRACE :
3401 return "{"; //$NON-NLS-1$
3402 case TokenNameRBRACE :
3403 return "}"; //$NON-NLS-1$
3404 case TokenNameLBRACKET :
3405 return "["; //$NON-NLS-1$
3406 case TokenNameRBRACKET :
3407 return "]"; //$NON-NLS-1$
3408 case TokenNameSEMICOLON :
3409 return ";"; //$NON-NLS-1$
3410 case TokenNameQUESTION :
3411 return "?"; //$NON-NLS-1$
3412 case TokenNameCOLON :
3413 return ":"; //$NON-NLS-1$
3414 case TokenNameCOMMA :
3415 return ","; //$NON-NLS-1$
3417 return "."; //$NON-NLS-1$
3418 case TokenNameEQUAL :
3419 return "="; //$NON-NLS-1$
3422 case TokenNameDOLLAR_LBRACE :
3425 return "EOF"; //$NON-NLS-1$
3426 case TokenNameWHITESPACE :
3427 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3428 case TokenNameCOMMENT_LINE :
3429 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3430 case TokenNameCOMMENT_BLOCK :
3431 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3432 case TokenNameCOMMENT_PHPDOC :
3433 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3434 case TokenNameHTML :
3435 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3437 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3441 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3442 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3446 boolean tokenizeComments,
3447 boolean tokenizeWhiteSpace,
3448 boolean checkNonExternalizedStringLiterals,
3449 boolean assertMode) {
3450 this.eofPosition = Integer.MAX_VALUE;
3451 this.tokenizeComments = tokenizeComments;
3452 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3453 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3454 this.assertMode = assertMode;
3457 private void checkNonExternalizeString() throws InvalidInputException {
3458 if (currentLine == null)
3460 parseTags(currentLine);
3463 private void parseTags(NLSLine line) throws InvalidInputException {
3464 String s = new String(getCurrentTokenSource());
3465 int pos = s.indexOf(TAG_PREFIX);
3466 int lineLength = line.size();
3468 int start = pos + TAG_PREFIX_LENGTH;
3469 int end = s.indexOf(TAG_POSTFIX, start);
3470 String index = s.substring(start, end);
3473 i = Integer.parseInt(index) - 1;
3474 // Tags are one based not zero based.
3475 } catch (NumberFormatException e) {
3476 i = -1; // we don't want to consider this as a valid NLS tag
3478 if (line.exists(i)) {
3481 pos = s.indexOf(TAG_PREFIX, start);
3484 this.nonNLSStrings = new StringLiteral[lineLength];
3485 int nonNLSCounter = 0;
3486 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3487 StringLiteral literal = (StringLiteral) iterator.next();
3488 if (literal != null) {
3489 this.nonNLSStrings[nonNLSCounter++] = literal;
3492 if (nonNLSCounter == 0) {
3493 this.nonNLSStrings = null;
3497 this.wasNonExternalizedStringLiteral = true;
3498 if (nonNLSCounter != lineLength) {
3499 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);