1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray =
120 new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121 static final int TableSize = 30, InternalTableSize = 6;
123 public static final int OptimizedLength = 6;
125 final char[][][][] charArray_length =
126 new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0,
155 public static final int RoundBracket = 0;
156 public static final int SquareBracket = 1;
157 public static final int CurlyBracket = 2;
158 public static final int BracketKinds = 3;
160 public static final boolean DEBUG = false;
164 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
165 this(tokenizeComments, tokenizeWhiteSpace, false);
169 * Determines if the specified character is
170 * permissible as the first character in a PHP identifier
172 public static boolean isPHPIdentifierStart(char ch) {
173 return Character.isLetter(ch) || (ch == '_');
177 * Determines if the specified character may be part of a PHP identifier as
178 * other than the first character
180 public static boolean isPHPIdentifierPart(char ch) {
181 return Character.isLetterOrDigit(ch) || (ch == '_');
184 public final boolean atEnd() {
185 // This code is not relevant if source is
186 // Only a part of the real stream input
188 return source.length == currentPosition;
190 public char[] getCurrentIdentifierSource() {
191 //return the token REAL source (aka unicodes are precomputed)
194 if (withoutUnicodePtr != 0)
195 //0 is used as a fast test flag so the real first char is in position 1
197 withoutUnicodeBuffer,
199 result = new char[withoutUnicodePtr],
203 int length = currentPosition - startPosition;
204 switch (length) { // see OptimizedLength
206 return optimizedCurrentTokenSource1();
208 return optimizedCurrentTokenSource2();
210 return optimizedCurrentTokenSource3();
212 return optimizedCurrentTokenSource4();
214 return optimizedCurrentTokenSource5();
216 return optimizedCurrentTokenSource6();
222 result = new char[length],
228 public int getCurrentTokenEndPosition() {
229 return this.currentPosition - 1;
231 public final char[] getCurrentTokenSource() {
232 // Return the token REAL source (aka unicodes are precomputed)
235 if (withoutUnicodePtr != 0)
236 // 0 is used as a fast test flag so the real first char is in position 1
238 withoutUnicodeBuffer,
240 result = new char[withoutUnicodePtr],
248 result = new char[length = currentPosition - startPosition],
255 public final char[] getCurrentTokenSource(int startPos) {
256 // Return the token REAL source (aka unicodes are precomputed)
259 if (withoutUnicodePtr != 0)
260 // 0 is used as a fast test flag so the real first char is in position 1
262 withoutUnicodeBuffer,
264 result = new char[withoutUnicodePtr],
272 result = new char[length = currentPosition - startPos],
279 public final char[] getCurrentTokenSourceString() {
280 //return the token REAL source (aka unicodes are precomputed).
281 //REMOVE the two " that are at the beginning and the end.
284 if (withoutUnicodePtr != 0)
285 //0 is used as a fast test flag so the real first char is in position 1
286 System.arraycopy(withoutUnicodeBuffer, 2,
287 //2 is 1 (real start) + 1 (to jump over the ")
288 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
294 result = new char[length = currentPosition - startPosition - 2],
300 public int getCurrentTokenStartPosition() {
301 return this.startPosition;
304 * Search the source position corresponding to the end of a given line number
306 * Line numbers are 1-based, and relative to the scanner initialPosition.
307 * Character positions are 0-based.
309 * In case the given line number is inconsistent, answers -1.
311 public final int getLineEnd(int lineNumber) {
313 if (lineEnds == null)
315 if (lineNumber >= lineEnds.length)
320 if (lineNumber == lineEnds.length - 1)
322 return lineEnds[lineNumber - 1];
323 // next line start one character behind the lineEnd of the previous line
326 * Search the source position corresponding to the beginning of a given line number
328 * Line numbers are 1-based, and relative to the scanner initialPosition.
329 * Character positions are 0-based.
331 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
333 * In case the given line number is inconsistent, answers -1.
335 public final int getLineStart(int lineNumber) {
337 if (lineEnds == null)
339 if (lineNumber >= lineEnds.length)
345 return initialPosition;
346 return lineEnds[lineNumber - 2] + 1;
347 // next line start one character behind the lineEnd of the previous line
349 public final boolean getNextChar(char testedChar) {
351 //handle the case of unicode.
352 //when a unicode appears then we must use a buffer that holds char internal values
353 //At the end of this method currentCharacter holds the new visited char
354 //and currentPosition points right next after it
355 //Both previous lines are true if the currentCharacter is == to the testedChar
356 //On false, no side effect has occured.
358 //ALL getNextChar.... ARE OPTIMIZED COPIES
360 int temp = currentPosition;
362 if (((currentCharacter = source[currentPosition++]) == '\\')
363 && (source[currentPosition] == 'u')) {
364 //-------------unicode traitement ------------
368 while (source[currentPosition] == 'u') {
373 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
375 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
377 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
379 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
381 currentPosition = temp;
385 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
386 if (currentCharacter != testedChar) {
387 currentPosition = temp;
390 unicodeAsBackSlash = currentCharacter == '\\';
392 //need the unicode buffer
393 if (withoutUnicodePtr == 0) {
394 //buffer all the entries that have been left aside....
395 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
399 withoutUnicodeBuffer,
403 //fill the buffer with the char
404 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
407 } //-------------end unicode traitement--------------
409 if (currentCharacter != testedChar) {
410 currentPosition = temp;
413 unicodeAsBackSlash = false;
414 if (withoutUnicodePtr != 0)
415 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
418 } catch (IndexOutOfBoundsException e) {
419 unicodeAsBackSlash = false;
420 currentPosition = temp;
424 public final int getNextChar(char testedChar1, char testedChar2) {
425 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
426 //test can be done with (x==0) for the first and (x>0) for the second
427 //handle the case of unicode.
428 //when a unicode appears then we must use a buffer that holds char internal values
429 //At the end of this method currentCharacter holds the new visited char
430 //and currentPosition points right next after it
431 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
432 //On false, no side effect has occured.
434 //ALL getNextChar.... ARE OPTIMIZED COPIES
436 int temp = currentPosition;
439 if (((currentCharacter = source[currentPosition++]) == '\\')
440 && (source[currentPosition] == 'u')) {
441 //-------------unicode traitement ------------
445 while (source[currentPosition] == 'u') {
450 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
452 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
454 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
456 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
458 currentPosition = temp;
462 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
463 if (currentCharacter == testedChar1)
465 else if (currentCharacter == testedChar2)
468 currentPosition = temp;
472 //need the unicode buffer
473 if (withoutUnicodePtr == 0) {
474 //buffer all the entries that have been left aside....
475 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
479 withoutUnicodeBuffer,
483 //fill the buffer with the char
484 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
486 } //-------------end unicode traitement--------------
488 if (currentCharacter == testedChar1)
490 else if (currentCharacter == testedChar2)
493 currentPosition = temp;
497 if (withoutUnicodePtr != 0)
498 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
501 } catch (IndexOutOfBoundsException e) {
502 currentPosition = temp;
506 public final boolean getNextCharAsDigit() {
508 //handle the case of unicode.
509 //when a unicode appears then we must use a buffer that holds char internal values
510 //At the end of this method currentCharacter holds the new visited char
511 //and currentPosition points right next after it
512 //Both previous lines are true if the currentCharacter is a digit
513 //On false, no side effect has occured.
515 //ALL getNextChar.... ARE OPTIMIZED COPIES
517 int temp = currentPosition;
519 if (((currentCharacter = source[currentPosition++]) == '\\')
520 && (source[currentPosition] == 'u')) {
521 //-------------unicode traitement ------------
525 while (source[currentPosition] == 'u') {
530 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
532 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
534 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
536 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
538 currentPosition = temp;
542 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
543 if (!Character.isDigit(currentCharacter)) {
544 currentPosition = temp;
548 //need the unicode buffer
549 if (withoutUnicodePtr == 0) {
550 //buffer all the entries that have been left aside....
551 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
555 withoutUnicodeBuffer,
559 //fill the buffer with the char
560 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
562 } //-------------end unicode traitement--------------
564 if (!Character.isDigit(currentCharacter)) {
565 currentPosition = temp;
568 if (withoutUnicodePtr != 0)
569 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
572 } catch (IndexOutOfBoundsException e) {
573 currentPosition = temp;
577 public final boolean getNextCharAsDigit(int radix) {
579 //handle the case of unicode.
580 //when a unicode appears then we must use a buffer that holds char internal values
581 //At the end of this method currentCharacter holds the new visited char
582 //and currentPosition points right next after it
583 //Both previous lines are true if the currentCharacter is a digit base on radix
584 //On false, no side effect has occured.
586 //ALL getNextChar.... ARE OPTIMIZED COPIES
588 int temp = currentPosition;
590 if (((currentCharacter = source[currentPosition++]) == '\\')
591 && (source[currentPosition] == 'u')) {
592 //-------------unicode traitement ------------
596 while (source[currentPosition] == 'u') {
601 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
603 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
605 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
607 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
609 currentPosition = temp;
613 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
614 if (Character.digit(currentCharacter, radix) == -1) {
615 currentPosition = temp;
619 //need the unicode buffer
620 if (withoutUnicodePtr == 0) {
621 //buffer all the entries that have been left aside....
622 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
626 withoutUnicodeBuffer,
630 //fill the buffer with the char
631 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
633 } //-------------end unicode traitement--------------
635 if (Character.digit(currentCharacter, radix) == -1) {
636 currentPosition = temp;
639 if (withoutUnicodePtr != 0)
640 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
643 } catch (IndexOutOfBoundsException e) {
644 currentPosition = temp;
648 public boolean getNextCharAsJavaIdentifierPart() {
650 //handle the case of unicode.
651 //when a unicode appears then we must use a buffer that holds char internal values
652 //At the end of this method currentCharacter holds the new visited char
653 //and currentPosition points right next after it
654 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
655 //On false, no side effect has occured.
657 //ALL getNextChar.... ARE OPTIMIZED COPIES
659 int temp = currentPosition;
661 if (((currentCharacter = source[currentPosition++]) == '\\')
662 && (source[currentPosition] == 'u')) {
663 //-------------unicode traitement ------------
667 while (source[currentPosition] == 'u') {
672 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
674 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
676 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
678 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
680 currentPosition = temp;
684 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
685 if (!isPHPIdentifierPart(currentCharacter)) {
686 currentPosition = temp;
690 //need the unicode buffer
691 if (withoutUnicodePtr == 0) {
692 //buffer all the entries that have been left aside....
693 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
697 withoutUnicodeBuffer,
701 //fill the buffer with the char
702 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
704 } //-------------end unicode traitement--------------
706 if (!isPHPIdentifierPart(currentCharacter)) {
707 currentPosition = temp;
711 if (withoutUnicodePtr != 0)
712 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
715 } catch (IndexOutOfBoundsException e) {
716 currentPosition = temp;
721 public int getNextToken() throws InvalidInputException {
724 int htmlPosition = currentPosition;
726 currentCharacter = source[currentPosition++];
727 if (currentCharacter == '<') {
728 if (getNextChar('?')) {
729 currentCharacter = source[currentPosition++];
730 if ((currentCharacter == ' ')
731 || Character.isWhitespace(currentCharacter)) {
733 startPosition = currentPosition;
735 if (tokenizeWhiteSpace) {
736 // && (whiteStart != currentPosition - 1)) {
737 // reposition scanner in case we are interested by spaces as tokens
738 startPosition = htmlPosition;
739 return TokenNameHTML;
743 (currentCharacter == 'P') || (currentCharacter == 'p');
745 int test = getNextChar('H', 'h');
747 test = getNextChar('P', 'p');
750 startPosition = currentPosition;
753 if (tokenizeWhiteSpace) {
754 // && (whiteStart != currentPosition - 1)) {
755 // reposition scanner in case we are interested by spaces as tokens
756 startPosition = htmlPosition;
757 return TokenNameHTML;
766 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
767 if (recordLineSeparator) {
774 } //-----------------end switch while try--------------------
775 catch (IndexOutOfBoundsException e) {
782 jumpOverMethodBody();
784 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
787 while (true) { //loop for jumping over comments
788 withoutUnicodePtr = 0;
789 //start with a new token (even comment written with unicode )
791 // ---------Consume white space and handles startPosition---------
792 int whiteStart = currentPosition;
793 boolean isWhiteSpace;
795 startPosition = currentPosition;
796 if (((currentCharacter = source[currentPosition++]) == '\\')
797 && (source[currentPosition] == 'u')) {
798 isWhiteSpace = jumpOverUnicodeWhiteSpace();
800 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
801 checkNonExternalizeString();
802 if (recordLineSeparator) {
809 (currentCharacter == ' ')
810 || Character.isWhitespace(currentCharacter);
812 } while (isWhiteSpace);
813 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
814 // reposition scanner in case we are interested by spaces as tokens
816 startPosition = whiteStart;
817 return TokenNameWHITESPACE;
819 //little trick to get out in the middle of a source compuation
820 if (currentPosition > eofPosition)
823 // ---------Identify the next token-------------
825 switch (currentCharacter) {
827 return TokenNameLPAREN;
829 return TokenNameRPAREN;
831 return TokenNameLBRACE;
833 return TokenNameRBRACE;
835 return TokenNameLBRACKET;
837 return TokenNameRBRACKET;
839 return TokenNameSEMICOLON;
841 return TokenNameCOMMA;
844 if (getNextCharAsDigit())
845 return scanNumber(true);
850 if ((test = getNextChar('+', '=')) == 0)
851 return TokenNamePLUS_PLUS;
853 return TokenNamePLUS_EQUAL;
854 return TokenNamePLUS;
859 if ((test = getNextChar('-', '=')) == 0)
860 return TokenNameMINUS_MINUS;
862 return TokenNameMINUS_EQUAL;
863 if (getNextChar('>'))
864 return TokenNameMINUS_GREATER;
866 return TokenNameMINUS;
869 if (getNextChar('='))
870 return TokenNameTWIDDLE_EQUAL;
871 return TokenNameTWIDDLE;
873 if (getNextChar('='))
874 return TokenNameNOT_EQUAL;
877 if (getNextChar('='))
878 return TokenNameMULTIPLY_EQUAL;
879 return TokenNameMULTIPLY;
881 if (getNextChar('='))
882 return TokenNameREMAINDER_EQUAL;
883 return TokenNameREMAINDER;
887 if ((test = getNextChar('=', '<')) == 0)
888 return TokenNameLESS_EQUAL;
890 if (getNextChar('='))
891 return TokenNameLEFT_SHIFT_EQUAL;
892 if (getNextChar('<')) {
893 int heredocStart = currentPosition;
894 int heredocLength = 0;
895 currentCharacter = source[currentPosition++];
896 if (isPHPIdentifierStart(currentCharacter)) {
897 currentCharacter = source[currentPosition++];
899 return TokenNameERROR;
901 while (isPHPIdentifierPart(currentCharacter)) {
902 currentCharacter = source[currentPosition++];
905 heredocLength = currentPosition - heredocStart - 1;
907 // heredoc end-tag determination
908 boolean endTag = true;
911 ch = source[currentPosition++];
912 if (ch == '\r' || ch == '\n') {
913 if (recordLineSeparator) {
918 for (int i = 0; i < heredocLength; i++) {
919 if (source[currentPosition + i]
920 != source[heredocStart + i]) {
926 currentPosition += heredocLength - 1;
927 currentCharacter = source[currentPosition++];
928 break; // do...while loop
936 return TokenNameHEREDOC;
938 return TokenNameLEFT_SHIFT;
940 return TokenNameLESS;
945 if ((test = getNextChar('=', '>')) == 0)
946 return TokenNameGREATER_EQUAL;
948 if ((test = getNextChar('=', '>')) == 0)
949 return TokenNameRIGHT_SHIFT_EQUAL;
950 return TokenNameRIGHT_SHIFT;
952 return TokenNameGREATER;
955 if (getNextChar('='))
956 return TokenNameEQUAL_EQUAL;
957 if (getNextChar('>'))
958 return TokenNameEQUAL_GREATER;
959 return TokenNameEQUAL;
963 if ((test = getNextChar('&', '=')) == 0)
964 return TokenNameAND_AND;
966 return TokenNameAND_EQUAL;
972 if ((test = getNextChar('|', '=')) == 0)
973 return TokenNameOR_OR;
975 return TokenNameOR_EQUAL;
979 if (getNextChar('='))
980 return TokenNameXOR_EQUAL;
983 if (getNextChar('>')) {
985 return TokenNameStopPHP;
987 return TokenNameQUESTION;
989 if (getNextChar(':'))
990 return TokenNameCOLON_COLON;
991 return TokenNameCOLON;
997 // if ((test = getNextChar('\n', '\r')) == 0) {
998 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1001 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1002 // for (int lookAhead = 0;
1005 // if (currentPosition + lookAhead
1006 // == source.length)
1008 // if (source[currentPosition + lookAhead]
1011 // if (source[currentPosition + lookAhead]
1013 // currentPosition += lookAhead + 1;
1017 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1020 // if (getNextChar('\'')) {
1021 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1022 // for (int lookAhead = 0;
1025 // if (currentPosition + lookAhead
1026 // == source.length)
1028 // if (source[currentPosition + lookAhead]
1031 // if (source[currentPosition + lookAhead]
1033 // currentPosition += lookAhead + 1;
1037 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1039 // if (getNextChar('\\'))
1040 // scanEscapeCharacter();
1041 // else { // consume next character
1042 // unicodeAsBackSlash = false;
1043 // if (((currentCharacter = source[currentPosition++])
1045 // && (source[currentPosition] == 'u')) {
1046 // getNextUnicodeChar();
1048 // if (withoutUnicodePtr != 0) {
1049 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1050 // currentCharacter;
1054 // // if (getNextChar('\''))
1055 // // return TokenNameCharacterLiteral;
1056 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1057 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1058 // if (currentPosition + lookAhead == source.length)
1060 // if (source[currentPosition + lookAhead] == '\n')
1062 // if (source[currentPosition + lookAhead] == '\'') {
1063 // currentPosition += lookAhead + 1;
1067 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1070 // consume next character
1071 unicodeAsBackSlash = false;
1072 if (((currentCharacter = source[currentPosition++]) == '\\')
1073 && (source[currentPosition] == 'u')) {
1074 getNextUnicodeChar();
1076 if (withoutUnicodePtr != 0) {
1077 withoutUnicodeBuffer[++withoutUnicodePtr] =
1082 while (currentCharacter != '\'') {
1084 /**** in PHP \r and \n are valid in string literals ****/
1085 // if ((currentCharacter == '\n')
1086 // || (currentCharacter == '\r')) {
1087 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1088 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1089 // if (currentPosition + lookAhead == source.length)
1091 // if (source[currentPosition + lookAhead] == '\n')
1093 // if (source[currentPosition + lookAhead] == '\"') {
1094 // currentPosition += lookAhead + 1;
1098 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1100 if (currentCharacter == '\\') {
1101 int escapeSize = currentPosition;
1102 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1103 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1104 scanSingleQuotedEscapeCharacter();
1105 escapeSize = currentPosition - escapeSize;
1106 if (withoutUnicodePtr == 0) {
1107 //buffer all the entries that have been left aside....
1109 currentPosition - escapeSize - 1 - startPosition;
1113 withoutUnicodeBuffer,
1116 withoutUnicodeBuffer[++withoutUnicodePtr] =
1118 } else { //overwrite the / in the buffer
1119 withoutUnicodeBuffer[withoutUnicodePtr] =
1121 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1122 withoutUnicodePtr--;
1126 // consume next character
1127 unicodeAsBackSlash = false;
1128 if (((currentCharacter = source[currentPosition++]) == '\\')
1129 && (source[currentPosition] == 'u')) {
1130 getNextUnicodeChar();
1132 if (withoutUnicodePtr != 0) {
1133 withoutUnicodeBuffer[++withoutUnicodePtr] =
1139 } catch (IndexOutOfBoundsException e) {
1140 throw new InvalidInputException(UNTERMINATED_STRING);
1141 } catch (InvalidInputException e) {
1142 if (e.getMessage().equals(INVALID_ESCAPE)) {
1143 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1144 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1145 if (currentPosition + lookAhead == source.length)
1147 if (source[currentPosition + lookAhead] == '\n')
1149 if (source[currentPosition + lookAhead] == '\'') {
1150 currentPosition += lookAhead + 1;
1158 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1159 if (currentLine == null) {
1160 currentLine = new NLSLine();
1161 lines.add(currentLine);
1165 getCurrentTokenSourceString(),
1167 currentPosition - 1));
1169 return TokenNameStringConstant;
1172 // consume next character
1173 unicodeAsBackSlash = false;
1174 if (((currentCharacter = source[currentPosition++]) == '\\')
1175 && (source[currentPosition] == 'u')) {
1176 getNextUnicodeChar();
1178 if (withoutUnicodePtr != 0) {
1179 withoutUnicodeBuffer[++withoutUnicodePtr] =
1184 while (currentCharacter != '"') {
1186 /**** in PHP \r and \n are valid in string literals ****/
1187 // if ((currentCharacter == '\n')
1188 // || (currentCharacter == '\r')) {
1189 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1190 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1191 // if (currentPosition + lookAhead == source.length)
1193 // if (source[currentPosition + lookAhead] == '\n')
1195 // if (source[currentPosition + lookAhead] == '\"') {
1196 // currentPosition += lookAhead + 1;
1200 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1202 if (currentCharacter == '\\') {
1203 int escapeSize = currentPosition;
1204 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1205 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1206 scanDoubleQuotedEscapeCharacter();
1207 escapeSize = currentPosition - escapeSize;
1208 if (withoutUnicodePtr == 0) {
1209 //buffer all the entries that have been left aside....
1211 currentPosition - escapeSize - 1 - startPosition;
1215 withoutUnicodeBuffer,
1218 withoutUnicodeBuffer[++withoutUnicodePtr] =
1220 } else { //overwrite the / in the buffer
1221 withoutUnicodeBuffer[withoutUnicodePtr] =
1223 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1224 withoutUnicodePtr--;
1228 // consume next character
1229 unicodeAsBackSlash = false;
1230 if (((currentCharacter = source[currentPosition++]) == '\\')
1231 && (source[currentPosition] == 'u')) {
1232 getNextUnicodeChar();
1234 if (withoutUnicodePtr != 0) {
1235 withoutUnicodeBuffer[++withoutUnicodePtr] =
1241 } catch (IndexOutOfBoundsException e) {
1242 throw new InvalidInputException(UNTERMINATED_STRING);
1243 } catch (InvalidInputException e) {
1244 if (e.getMessage().equals(INVALID_ESCAPE)) {
1245 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1246 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1247 if (currentPosition + lookAhead == source.length)
1249 if (source[currentPosition + lookAhead] == '\n')
1251 if (source[currentPosition + lookAhead] == '\"') {
1252 currentPosition += lookAhead + 1;
1260 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1261 if (currentLine == null) {
1262 currentLine = new NLSLine();
1263 lines.add(currentLine);
1267 getCurrentTokenSourceString(),
1269 currentPosition - 1));
1271 return TokenNameStringLiteral;
1274 // consume next character
1275 unicodeAsBackSlash = false;
1276 if (((currentCharacter = source[currentPosition++]) == '\\')
1277 && (source[currentPosition] == 'u')) {
1278 getNextUnicodeChar();
1280 if (withoutUnicodePtr != 0) {
1281 withoutUnicodeBuffer[++withoutUnicodePtr] =
1286 while (currentCharacter != '`') {
1288 /**** in PHP \r and \n are valid in string literals ****/
1289 // if ((currentCharacter == '\n')
1290 // || (currentCharacter == '\r')) {
1291 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1292 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1293 // if (currentPosition + lookAhead == source.length)
1295 // if (source[currentPosition + lookAhead] == '\n')
1297 // if (source[currentPosition + lookAhead] == '\"') {
1298 // currentPosition += lookAhead + 1;
1302 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1304 if (currentCharacter == '\\') {
1305 int escapeSize = currentPosition;
1306 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1307 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1308 scanDoubleQuotedEscapeCharacter();
1309 escapeSize = currentPosition - escapeSize;
1310 if (withoutUnicodePtr == 0) {
1311 //buffer all the entries that have been left aside....
1313 currentPosition - escapeSize - 1 - startPosition;
1317 withoutUnicodeBuffer,
1320 withoutUnicodeBuffer[++withoutUnicodePtr] =
1322 } else { //overwrite the / in the buffer
1323 withoutUnicodeBuffer[withoutUnicodePtr] =
1325 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1326 withoutUnicodePtr--;
1330 // consume next character
1331 unicodeAsBackSlash = false;
1332 if (((currentCharacter = source[currentPosition++]) == '\\')
1333 && (source[currentPosition] == 'u')) {
1334 getNextUnicodeChar();
1336 if (withoutUnicodePtr != 0) {
1337 withoutUnicodeBuffer[++withoutUnicodePtr] =
1343 } catch (IndexOutOfBoundsException e) {
1344 throw new InvalidInputException(UNTERMINATED_STRING);
1345 } catch (InvalidInputException e) {
1346 if (e.getMessage().equals(INVALID_ESCAPE)) {
1347 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1348 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1349 if (currentPosition + lookAhead == source.length)
1351 if (source[currentPosition + lookAhead] == '\n')
1353 if (source[currentPosition + lookAhead] == '`') {
1354 currentPosition += lookAhead + 1;
1362 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1363 if (currentLine == null) {
1364 currentLine = new NLSLine();
1365 lines.add(currentLine);
1369 getCurrentTokenSourceString(),
1371 currentPosition - 1));
1373 return TokenNameStringInterpolated;
1378 if ((currentCharacter == '#')
1379 || (test = getNextChar('/', '*')) == 0) {
1381 int endPositionForLineComment = 0;
1382 try { //get the next char
1383 if (((currentCharacter = source[currentPosition++])
1385 && (source[currentPosition] == 'u')) {
1386 //-------------unicode traitement ------------
1387 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1389 while (source[currentPosition] == 'u') {
1393 Character.getNumericValue(source[currentPosition++]))
1397 Character.getNumericValue(source[currentPosition++]))
1401 Character.getNumericValue(source[currentPosition++]))
1405 Character.getNumericValue(source[currentPosition++]))
1408 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1411 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1415 //handle the \\u case manually into comment
1416 if (currentCharacter == '\\') {
1417 if (source[currentPosition] == '\\')
1419 } //jump over the \\
1420 boolean isUnicode = false;
1421 while (currentCharacter != '\r'
1422 && currentCharacter != '\n') {
1423 if (currentCharacter == '?') {
1424 if (getNextChar('>')) {
1425 startPosition = currentPosition - 2;
1427 return TokenNameStopPHP;
1433 if (((currentCharacter = source[currentPosition++])
1435 && (source[currentPosition] == 'u')) {
1437 //-------------unicode traitement ------------
1438 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1440 while (source[currentPosition] == 'u') {
1444 Character.getNumericValue(source[currentPosition++]))
1448 Character.getNumericValue(
1449 source[currentPosition++]))
1453 Character.getNumericValue(
1454 source[currentPosition++]))
1458 Character.getNumericValue(
1459 source[currentPosition++]))
1462 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1465 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1468 //handle the \\u case manually into comment
1469 if (currentCharacter == '\\') {
1470 if (source[currentPosition] == '\\')
1472 } //jump over the \\
1475 endPositionForLineComment = currentPosition - 6;
1477 endPositionForLineComment = currentPosition - 1;
1479 recordComment(false);
1480 if ((currentCharacter == '\r')
1481 || (currentCharacter == '\n')) {
1482 checkNonExternalizeString();
1483 if (recordLineSeparator) {
1485 pushUnicodeLineSeparator();
1487 pushLineSeparator();
1493 if (tokenizeComments) {
1495 currentPosition = endPositionForLineComment;
1496 // reset one character behind
1498 return TokenNameCOMMENT_LINE;
1500 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1501 if (tokenizeComments) {
1503 // reset one character behind
1504 return TokenNameCOMMENT_LINE;
1510 //traditional and annotation comment
1511 boolean isJavadoc = false, star = false;
1512 // consume next character
1513 unicodeAsBackSlash = false;
1514 if (((currentCharacter = source[currentPosition++]) == '\\')
1515 && (source[currentPosition] == 'u')) {
1516 getNextUnicodeChar();
1518 if (withoutUnicodePtr != 0) {
1519 withoutUnicodeBuffer[++withoutUnicodePtr] =
1524 if (currentCharacter == '*') {
1528 if ((currentCharacter == '\r')
1529 || (currentCharacter == '\n')) {
1530 checkNonExternalizeString();
1531 if (recordLineSeparator) {
1532 pushLineSeparator();
1537 try { //get the next char
1538 if (((currentCharacter = source[currentPosition++])
1540 && (source[currentPosition] == 'u')) {
1541 //-------------unicode traitement ------------
1542 getNextUnicodeChar();
1544 //handle the \\u case manually into comment
1545 if (currentCharacter == '\\') {
1546 if (source[currentPosition] == '\\')
1550 // empty comment is not a javadoc /**/
1551 if (currentCharacter == '/') {
1554 //loop until end of comment */
1555 while ((currentCharacter != '/') || (!star)) {
1556 if ((currentCharacter == '\r')
1557 || (currentCharacter == '\n')) {
1558 checkNonExternalizeString();
1559 if (recordLineSeparator) {
1560 pushLineSeparator();
1565 star = currentCharacter == '*';
1567 if (((currentCharacter = source[currentPosition++])
1569 && (source[currentPosition] == 'u')) {
1570 //-------------unicode traitement ------------
1571 getNextUnicodeChar();
1573 //handle the \\u case manually into comment
1574 if (currentCharacter == '\\') {
1575 if (source[currentPosition] == '\\')
1577 } //jump over the \\
1579 recordComment(isJavadoc);
1580 if (tokenizeComments) {
1582 return TokenNameCOMMENT_PHPDOC;
1583 return TokenNameCOMMENT_BLOCK;
1585 } catch (IndexOutOfBoundsException e) {
1586 throw new InvalidInputException(UNTERMINATED_COMMENT);
1590 if (getNextChar('='))
1591 return TokenNameDIVIDE_EQUAL;
1592 return TokenNameDIVIDE;
1596 return TokenNameEOF;
1597 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1598 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1601 if (currentCharacter == '$') {
1602 while ((currentCharacter = source[currentPosition++]) == '$') {
1604 if (currentCharacter == '{')
1605 return TokenNameDOLLAR_LBRACE;
1606 if (isPHPIdentifierStart(currentCharacter))
1607 return scanIdentifierOrKeyword(true);
1608 return TokenNameERROR;
1610 if (isPHPIdentifierStart(currentCharacter))
1611 return scanIdentifierOrKeyword(false);
1612 if (Character.isDigit(currentCharacter))
1613 return scanNumber(false);
1614 return TokenNameERROR;
1617 } //-----------------end switch while try--------------------
1618 catch (IndexOutOfBoundsException e) {
1621 return TokenNameEOF;
1624 public final void getNextUnicodeChar()
1625 throws IndexOutOfBoundsException, InvalidInputException {
1627 //handle the case of unicode.
1628 //when a unicode appears then we must use a buffer that holds char internal values
1629 //At the end of this method currentCharacter holds the new visited char
1630 //and currentPosition points right next after it
1632 //ALL getNextChar.... ARE OPTIMIZED COPIES
1634 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1636 while (source[currentPosition] == 'u') {
1641 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1643 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1645 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1647 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1649 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1651 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1652 //need the unicode buffer
1653 if (withoutUnicodePtr == 0) {
1654 //buffer all the entries that have been left aside....
1655 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1659 withoutUnicodeBuffer,
1663 //fill the buffer with the char
1664 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1666 unicodeAsBackSlash = currentCharacter == '\\';
1668 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1670 public final void jumpOverMethodBody() {
1672 this.wasAcr = false;
1675 while (true) { //loop for jumping over comments
1676 // ---------Consume white space and handles startPosition---------
1677 boolean isWhiteSpace;
1679 startPosition = currentPosition;
1680 if (((currentCharacter = source[currentPosition++]) == '\\')
1681 && (source[currentPosition] == 'u')) {
1682 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1684 if (recordLineSeparator
1685 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1686 pushLineSeparator();
1687 isWhiteSpace = Character.isWhitespace(currentCharacter);
1689 } while (isWhiteSpace);
1691 // -------consume token until } is found---------
1692 switch (currentCharacter) {
1704 test = getNextChar('\\');
1707 scanDoubleQuotedEscapeCharacter();
1708 } catch (InvalidInputException ex) {
1711 try { // consume next character
1712 unicodeAsBackSlash = false;
1713 if (((currentCharacter = source[currentPosition++]) == '\\')
1714 && (source[currentPosition] == 'u')) {
1715 getNextUnicodeChar();
1717 if (withoutUnicodePtr != 0) {
1718 withoutUnicodeBuffer[++withoutUnicodePtr] =
1722 } catch (InvalidInputException ex) {
1730 try { // consume next character
1731 unicodeAsBackSlash = false;
1732 if (((currentCharacter = source[currentPosition++]) == '\\')
1733 && (source[currentPosition] == 'u')) {
1734 getNextUnicodeChar();
1736 if (withoutUnicodePtr != 0) {
1737 withoutUnicodeBuffer[++withoutUnicodePtr] =
1741 } catch (InvalidInputException ex) {
1743 while (currentCharacter != '"') {
1744 if (currentCharacter == '\r') {
1745 if (source[currentPosition] == '\n')
1748 // the string cannot go further that the line
1750 if (currentCharacter == '\n') {
1752 // the string cannot go further that the line
1754 if (currentCharacter == '\\') {
1756 scanDoubleQuotedEscapeCharacter();
1757 } catch (InvalidInputException ex) {
1760 try { // consume next character
1761 unicodeAsBackSlash = false;
1762 if (((currentCharacter = source[currentPosition++]) == '\\')
1763 && (source[currentPosition] == 'u')) {
1764 getNextUnicodeChar();
1766 if (withoutUnicodePtr != 0) {
1767 withoutUnicodeBuffer[++withoutUnicodePtr] =
1771 } catch (InvalidInputException ex) {
1774 } catch (IndexOutOfBoundsException e) {
1781 if ((test = getNextChar('/', '*')) == 0) {
1785 if (((currentCharacter = source[currentPosition++]) == '\\')
1786 && (source[currentPosition] == 'u')) {
1787 //-------------unicode traitement ------------
1788 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1790 while (source[currentPosition] == 'u') {
1794 Character.getNumericValue(source[currentPosition++]))
1798 Character.getNumericValue(source[currentPosition++]))
1802 Character.getNumericValue(source[currentPosition++]))
1806 Character.getNumericValue(source[currentPosition++]))
1809 //error don't care of the value
1810 currentCharacter = 'A';
1811 } //something different from \n and \r
1814 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1818 while (currentCharacter != '\r'
1819 && currentCharacter != '\n') {
1821 if (((currentCharacter = source[currentPosition++])
1823 && (source[currentPosition] == 'u')) {
1824 //-------------unicode traitement ------------
1825 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1827 while (source[currentPosition] == 'u') {
1831 Character.getNumericValue(source[currentPosition++]))
1835 Character.getNumericValue(source[currentPosition++]))
1839 Character.getNumericValue(source[currentPosition++]))
1843 Character.getNumericValue(source[currentPosition++]))
1846 //error don't care of the value
1847 currentCharacter = 'A';
1848 } //something different from \n and \r
1851 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1855 if (recordLineSeparator
1856 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1857 pushLineSeparator();
1858 } catch (IndexOutOfBoundsException e) {
1859 } //an eof will them be generated
1863 //traditional and annotation comment
1864 boolean star = false;
1865 try { // consume next character
1866 unicodeAsBackSlash = false;
1867 if (((currentCharacter = source[currentPosition++]) == '\\')
1868 && (source[currentPosition] == 'u')) {
1869 getNextUnicodeChar();
1871 if (withoutUnicodePtr != 0) {
1872 withoutUnicodeBuffer[++withoutUnicodePtr] =
1876 } catch (InvalidInputException ex) {
1878 if (currentCharacter == '*') {
1881 if (recordLineSeparator
1882 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1883 pushLineSeparator();
1884 try { //get the next char
1885 if (((currentCharacter = source[currentPosition++]) == '\\')
1886 && (source[currentPosition] == 'u')) {
1887 //-------------unicode traitement ------------
1888 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1890 while (source[currentPosition] == 'u') {
1894 Character.getNumericValue(source[currentPosition++]))
1898 Character.getNumericValue(source[currentPosition++]))
1902 Character.getNumericValue(source[currentPosition++]))
1906 Character.getNumericValue(source[currentPosition++]))
1909 //error don't care of the value
1910 currentCharacter = 'A';
1911 } //something different from * and /
1914 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1917 //loop until end of comment */
1918 while ((currentCharacter != '/') || (!star)) {
1919 if (recordLineSeparator
1920 && ((currentCharacter == '\r')
1921 || (currentCharacter == '\n')))
1922 pushLineSeparator();
1923 star = currentCharacter == '*';
1925 if (((currentCharacter = source[currentPosition++])
1927 && (source[currentPosition] == 'u')) {
1928 //-------------unicode traitement ------------
1929 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1931 while (source[currentPosition] == 'u') {
1935 Character.getNumericValue(source[currentPosition++]))
1939 Character.getNumericValue(source[currentPosition++]))
1943 Character.getNumericValue(source[currentPosition++]))
1947 Character.getNumericValue(source[currentPosition++]))
1950 //error don't care of the value
1951 currentCharacter = 'A';
1952 } //something different from * and /
1955 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1959 } catch (IndexOutOfBoundsException e) {
1968 if (isPHPIdentifierStart(currentCharacter)
1969 || currentCharacter == '$') {
1971 scanIdentifierOrKeyword((currentCharacter == '$'));
1972 } catch (InvalidInputException ex) {
1976 if (Character.isDigit(currentCharacter)) {
1979 } catch (InvalidInputException ex) {
1985 //-----------------end switch while try--------------------
1986 } catch (IndexOutOfBoundsException e) {
1987 } catch (InvalidInputException e) {
1991 public final boolean jumpOverUnicodeWhiteSpace()
1992 throws InvalidInputException {
1994 //handle the case of unicode. Jump over the next whiteSpace
1995 //making startPosition pointing on the next available char
1996 //On false, the currentCharacter is filled up with a potential
2000 this.wasAcr = false;
2002 int unicodeSize = 6;
2004 while (source[currentPosition] == 'u') {
2009 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2011 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2013 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2015 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2017 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2020 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2021 if (recordLineSeparator
2022 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2023 pushLineSeparator();
2024 if (Character.isWhitespace(currentCharacter))
2027 //buffer the new char which is not a white space
2028 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2029 //withoutUnicodePtr == 1 is true here
2031 } catch (IndexOutOfBoundsException e) {
2032 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2035 public final int[] getLineEnds() {
2036 //return a bounded copy of this.lineEnds
2039 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2043 public char[] getSource() {
2046 final char[] optimizedCurrentTokenSource1() {
2047 //return always the same char[] build only once
2049 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2050 char charOne = source[startPosition];
2105 return new char[] { charOne };
2109 final char[] optimizedCurrentTokenSource2() {
2110 //try to return the same char[] build only once
2114 (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2116 char[][] table = charArray_length[0][hash];
2118 while (++i < InternalTableSize) {
2119 char[] charArray = table[i];
2120 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2123 //---------other side---------
2125 int max = newEntry2;
2126 while (++i <= max) {
2127 char[] charArray = table[i];
2128 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2131 //--------add the entry-------
2132 if (++max >= InternalTableSize)
2135 table[max] = (r = new char[] { c0, c1 });
2140 final char[] optimizedCurrentTokenSource3() {
2141 //try to return the same char[] build only once
2145 (((c0 = source[startPosition]) << 12)
2146 + ((c1 = source[startPosition + 1]) << 6)
2147 + (c2 = source[startPosition + 2]))
2149 char[][] table = charArray_length[1][hash];
2151 while (++i < InternalTableSize) {
2152 char[] charArray = table[i];
2153 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2156 //---------other side---------
2158 int max = newEntry3;
2159 while (++i <= max) {
2160 char[] charArray = table[i];
2161 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2164 //--------add the entry-------
2165 if (++max >= InternalTableSize)
2168 table[max] = (r = new char[] { c0, c1, c2 });
2173 final char[] optimizedCurrentTokenSource4() {
2174 //try to return the same char[] build only once
2176 char c0, c1, c2, c3;
2178 ((((long) (c0 = source[startPosition])) << 18)
2179 + ((c1 = source[startPosition + 1]) << 12)
2180 + ((c2 = source[startPosition + 2]) << 6)
2181 + (c3 = source[startPosition + 3]))
2183 char[][] table = charArray_length[2][(int) hash];
2185 while (++i < InternalTableSize) {
2186 char[] charArray = table[i];
2187 if ((c0 == charArray[0])
2188 && (c1 == charArray[1])
2189 && (c2 == charArray[2])
2190 && (c3 == charArray[3]))
2193 //---------other side---------
2195 int max = newEntry4;
2196 while (++i <= max) {
2197 char[] charArray = table[i];
2198 if ((c0 == charArray[0])
2199 && (c1 == charArray[1])
2200 && (c2 == charArray[2])
2201 && (c3 == charArray[3]))
2204 //--------add the entry-------
2205 if (++max >= InternalTableSize)
2208 table[max] = (r = new char[] { c0, c1, c2, c3 });
2214 final char[] optimizedCurrentTokenSource5() {
2215 //try to return the same char[] build only once
2217 char c0, c1, c2, c3, c4;
2219 ((((long) (c0 = source[startPosition])) << 24)
2220 + (((long) (c1 = source[startPosition + 1])) << 18)
2221 + ((c2 = source[startPosition + 2]) << 12)
2222 + ((c3 = source[startPosition + 3]) << 6)
2223 + (c4 = source[startPosition + 4]))
2225 char[][] table = charArray_length[3][(int) hash];
2227 while (++i < InternalTableSize) {
2228 char[] charArray = table[i];
2229 if ((c0 == charArray[0])
2230 && (c1 == charArray[1])
2231 && (c2 == charArray[2])
2232 && (c3 == charArray[3])
2233 && (c4 == charArray[4]))
2236 //---------other side---------
2238 int max = newEntry5;
2239 while (++i <= max) {
2240 char[] charArray = table[i];
2241 if ((c0 == charArray[0])
2242 && (c1 == charArray[1])
2243 && (c2 == charArray[2])
2244 && (c3 == charArray[3])
2245 && (c4 == charArray[4]))
2248 //--------add the entry-------
2249 if (++max >= InternalTableSize)
2252 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2258 final char[] optimizedCurrentTokenSource6() {
2259 //try to return the same char[] build only once
2261 char c0, c1, c2, c3, c4, c5;
2263 ((((long) (c0 = source[startPosition])) << 32)
2264 + (((long) (c1 = source[startPosition + 1])) << 24)
2265 + (((long) (c2 = source[startPosition + 2])) << 18)
2266 + ((c3 = source[startPosition + 3]) << 12)
2267 + ((c4 = source[startPosition + 4]) << 6)
2268 + (c5 = source[startPosition + 5]))
2270 char[][] table = charArray_length[4][(int) hash];
2272 while (++i < InternalTableSize) {
2273 char[] charArray = table[i];
2274 if ((c0 == charArray[0])
2275 && (c1 == charArray[1])
2276 && (c2 == charArray[2])
2277 && (c3 == charArray[3])
2278 && (c4 == charArray[4])
2279 && (c5 == charArray[5]))
2282 //---------other side---------
2284 int max = newEntry6;
2285 while (++i <= max) {
2286 char[] charArray = table[i];
2287 if ((c0 == charArray[0])
2288 && (c1 == charArray[1])
2289 && (c2 == charArray[2])
2290 && (c3 == charArray[3])
2291 && (c4 == charArray[4])
2292 && (c5 == charArray[5]))
2295 //--------add the entry-------
2296 if (++max >= InternalTableSize)
2299 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2304 public final void pushLineSeparator() throws InvalidInputException {
2305 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2306 final int INCREMENT = 250;
2308 if (this.checkNonExternalizedStringLiterals) {
2309 // reinitialize the current line for non externalize strings purpose
2312 //currentCharacter is at position currentPosition-1
2315 if (currentCharacter == '\r') {
2316 int separatorPos = currentPosition - 1;
2317 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2319 //System.out.println("CR-" + separatorPos);
2321 lineEnds[++linePtr] = separatorPos;
2322 } catch (IndexOutOfBoundsException e) {
2323 //linePtr value is correct
2324 int oldLength = lineEnds.length;
2325 int[] old = lineEnds;
2326 lineEnds = new int[oldLength + INCREMENT];
2327 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2328 lineEnds[linePtr] = separatorPos;
2330 // look-ahead for merged cr+lf
2332 if (source[currentPosition] == '\n') {
2333 //System.out.println("look-ahead LF-" + currentPosition);
2334 lineEnds[linePtr] = currentPosition;
2340 } catch (IndexOutOfBoundsException e) {
2345 if (currentCharacter == '\n') {
2346 //must merge eventual cr followed by lf
2347 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2348 //System.out.println("merge LF-" + (currentPosition - 1));
2349 lineEnds[linePtr] = currentPosition - 1;
2351 int separatorPos = currentPosition - 1;
2352 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2354 // System.out.println("LF-" + separatorPos);
2356 lineEnds[++linePtr] = separatorPos;
2357 } catch (IndexOutOfBoundsException e) {
2358 //linePtr value is correct
2359 int oldLength = lineEnds.length;
2360 int[] old = lineEnds;
2361 lineEnds = new int[oldLength + INCREMENT];
2362 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2363 lineEnds[linePtr] = separatorPos;
2370 public final void pushUnicodeLineSeparator() {
2371 // isUnicode means that the \r or \n has been read as a unicode character
2373 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2375 final int INCREMENT = 250;
2376 //currentCharacter is at position currentPosition-1
2378 if (this.checkNonExternalizedStringLiterals) {
2379 // reinitialize the current line for non externalize strings purpose
2384 if (currentCharacter == '\r') {
2385 int separatorPos = currentPosition - 6;
2386 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2388 //System.out.println("CR-" + separatorPos);
2390 lineEnds[++linePtr] = separatorPos;
2391 } catch (IndexOutOfBoundsException e) {
2392 //linePtr value is correct
2393 int oldLength = lineEnds.length;
2394 int[] old = lineEnds;
2395 lineEnds = new int[oldLength + INCREMENT];
2396 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2397 lineEnds[linePtr] = separatorPos;
2399 // look-ahead for merged cr+lf
2400 if (source[currentPosition] == '\n') {
2401 //System.out.println("look-ahead LF-" + currentPosition);
2402 lineEnds[linePtr] = currentPosition;
2410 if (currentCharacter == '\n') {
2411 //must merge eventual cr followed by lf
2412 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2413 //System.out.println("merge LF-" + (currentPosition - 1));
2414 lineEnds[linePtr] = currentPosition - 6;
2416 int separatorPos = currentPosition - 6;
2417 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2419 // System.out.println("LF-" + separatorPos);
2421 lineEnds[++linePtr] = separatorPos;
2422 } catch (IndexOutOfBoundsException e) {
2423 //linePtr value is correct
2424 int oldLength = lineEnds.length;
2425 int[] old = lineEnds;
2426 lineEnds = new int[oldLength + INCREMENT];
2427 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2428 lineEnds[linePtr] = separatorPos;
2435 public final void recordComment(boolean isJavadoc) {
2437 // a new annotation comment is recorded
2439 commentStops[++commentPtr] =
2440 isJavadoc ? currentPosition : -currentPosition;
2441 } catch (IndexOutOfBoundsException e) {
2442 int oldStackLength = commentStops.length;
2443 int[] oldStack = commentStops;
2444 commentStops = new int[oldStackLength + 30];
2445 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2446 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2447 //grows the positions buffers too
2448 int[] old = commentStarts;
2449 commentStarts = new int[oldStackLength + 30];
2450 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2453 //the buffer is of a correct size here
2454 commentStarts[commentPtr] = startPosition;
2456 public void resetTo(int begin, int end) {
2457 //reset the scanner to a given position where it may rescan again
2460 initialPosition = startPosition = currentPosition = begin;
2461 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2462 commentPtr = -1; // reset comment stack
2465 public final void scanSingleQuotedEscapeCharacter()
2466 throws InvalidInputException {
2467 // the string with "\\u" is a legal string of two chars \ and u
2468 //thus we use a direct access to the source (for regular cases).
2470 if (unicodeAsBackSlash) {
2471 // consume next character
2472 unicodeAsBackSlash = false;
2473 if (((currentCharacter = source[currentPosition++]) == '\\')
2474 && (source[currentPosition] == 'u')) {
2475 getNextUnicodeChar();
2477 if (withoutUnicodePtr != 0) {
2478 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2482 currentCharacter = source[currentPosition++];
2483 switch (currentCharacter) {
2485 currentCharacter = '\'';
2488 currentCharacter = '\\';
2491 currentCharacter = '\\';
2496 public final void scanDoubleQuotedEscapeCharacter()
2497 throws InvalidInputException {
2498 // the string with "\\u" is a legal string of two chars \ and u
2499 //thus we use a direct access to the source (for regular cases).
2501 if (unicodeAsBackSlash) {
2502 // consume next character
2503 unicodeAsBackSlash = false;
2504 if (((currentCharacter = source[currentPosition++]) == '\\')
2505 && (source[currentPosition] == 'u')) {
2506 getNextUnicodeChar();
2508 if (withoutUnicodePtr != 0) {
2509 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2513 currentCharacter = source[currentPosition++];
2514 switch (currentCharacter) {
2516 // currentCharacter = '\b';
2519 currentCharacter = '\t';
2522 currentCharacter = '\n';
2525 // currentCharacter = '\f';
2528 currentCharacter = '\r';
2531 currentCharacter = '\"';
2534 currentCharacter = '\'';
2537 currentCharacter = '\\';
2540 currentCharacter = '$';
2543 // -----------octal escape--------------
2545 // OctalDigit OctalDigit
2546 // ZeroToThree OctalDigit OctalDigit
2548 int number = Character.getNumericValue(currentCharacter);
2549 if (number >= 0 && number <= 7) {
2550 boolean zeroToThreeNot = number > 3;
2552 .isDigit(currentCharacter = source[currentPosition++])) {
2553 int digit = Character.getNumericValue(currentCharacter);
2554 if (digit >= 0 && digit <= 7) {
2555 number = (number * 8) + digit;
2557 .isDigit(currentCharacter = source[currentPosition++])) {
2558 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2561 digit = Character.getNumericValue(currentCharacter);
2562 if (digit >= 0 && digit <= 7) {
2563 // has read \ZeroToThree OctalDigit OctalDigit
2564 number = (number * 8) + digit;
2565 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2569 } else { // has read \OctalDigit NonDigit--> ignore last character
2572 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2575 } else { // has read \OctalDigit --> ignore last character
2579 throw new InvalidInputException(INVALID_ESCAPE);
2580 currentCharacter = (char) number;
2583 // throw new InvalidInputException(INVALID_ESCAPE);
2587 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2588 // return scanIdentifierOrKeyword( false );
2591 public int scanIdentifierOrKeyword(boolean isVariable)
2592 throws InvalidInputException {
2595 //first dispatch on the first char.
2596 //then the length. If there are several
2597 //keywors with the same length AND the same first char, then do another
2598 //disptach on the second char :-)...cool....but fast !
2600 useAssertAsAnIndentifier = false;
2602 while (getNextCharAsJavaIdentifierPart()) {
2606 return TokenNameVariable;
2611 if (withoutUnicodePtr == 0)
2613 //quick test on length == 1 but not on length > 12 while most identifier
2614 //have a length which is <= 12...but there are lots of identifier with
2618 if ((length = currentPosition - startPosition) == 1)
2619 return TokenNameIdentifier;
2621 data = new char[length];
2622 index = startPosition;
2623 for (int i = 0; i < length; i++) {
2624 data[i] = Character.toLowerCase(source[index + i]);
2628 if ((length = withoutUnicodePtr) == 1)
2629 return TokenNameIdentifier;
2630 // data = withoutUnicodeBuffer;
2631 data = new char[withoutUnicodeBuffer.length];
2632 for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2633 data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2638 firstLetter = data[index];
2639 switch (firstLetter) {
2641 case 'a' : // as and array
2644 if ((data[++index] == 's')) {
2647 return TokenNameIdentifier;
2650 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2651 return TokenNameAND;
2653 return TokenNameIdentifier;
2656 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2657 // return TokenNamearray;
2659 // return TokenNameIdentifier;
2661 return TokenNameIdentifier;
2666 if ((data[++index] == 'r')
2667 && (data[++index] == 'e')
2668 && (data[++index] == 'a')
2669 && (data[++index] == 'k'))
2670 return TokenNamebreak;
2672 return TokenNameIdentifier;
2674 return TokenNameIdentifier;
2677 case 'c' : //case class continue
2680 if ((data[++index] == 'a')
2681 && (data[++index] == 's')
2682 && (data[++index] == 'e'))
2683 return TokenNamecase;
2685 return TokenNameIdentifier;
2687 if ((data[++index] == 'l')
2688 && (data[++index] == 'a')
2689 && (data[++index] == 's')
2690 && (data[++index] == 's'))
2691 return TokenNameclass;
2693 return TokenNameIdentifier;
2695 if ((data[++index] == 'o')
2696 && (data[++index] == 'n')
2697 && (data[++index] == 't')
2698 && (data[++index] == 'i')
2699 && (data[++index] == 'n')
2700 && (data[++index] == 'u')
2701 && (data[++index] == 'e'))
2702 return TokenNamecontinue;
2704 return TokenNameIdentifier;
2706 return TokenNameIdentifier;
2709 case 'd' : //define default do
2712 if ((data[++index] == 'o'))
2715 return TokenNameIdentifier;
2717 if ((data[++index] == 'e')
2718 && (data[++index] == 'f')
2719 && (data[++index] == 'i')
2720 && (data[++index] == 'n')
2721 && (data[++index] == 'e'))
2722 return TokenNamedefine;
2724 return TokenNameIdentifier;
2726 if ((data[++index] == 'e')
2727 && (data[++index] == 'f')
2728 && (data[++index] == 'a')
2729 && (data[++index] == 'u')
2730 && (data[++index] == 'l')
2731 && (data[++index] == 't'))
2732 return TokenNamedefault;
2734 return TokenNameIdentifier;
2736 return TokenNameIdentifier;
2738 case 'e' : //echo else elseif extends
2741 if ((data[++index] == 'c')
2742 && (data[++index] == 'h')
2743 && (data[++index] == 'o'))
2744 return TokenNameecho;
2746 (data[index] == 'l')
2747 && (data[++index] == 's')
2748 && (data[++index] == 'e'))
2749 return TokenNameelse;
2751 return TokenNameIdentifier;
2753 if ((data[++index] == 'n')
2754 && (data[++index] == 'd')
2755 && (data[++index] == 'i')
2756 && (data[++index] == 'f'))
2757 return TokenNameendif;
2759 return TokenNameIdentifier;
2761 if ((data[++index] == 'n')
2762 && (data[++index] == 'd')
2763 && (data[++index] == 'f')
2764 && (data[++index] == 'o')
2765 && (data[++index] == 'r'))
2766 return TokenNameendfor;
2768 (data[index] == 'l')
2769 && (data[++index] == 's')
2770 && (data[++index] == 'e')
2771 && (data[++index] == 'i')
2772 && (data[++index] == 'f'))
2773 return TokenNameelseif;
2775 return TokenNameIdentifier;
2777 if ((data[++index] == 'x')
2778 && (data[++index] == 't')
2779 && (data[++index] == 'e')
2780 && (data[++index] == 'n')
2781 && (data[++index] == 'd')
2782 && (data[++index] == 's'))
2783 return TokenNameextends;
2785 return TokenNameIdentifier;
2786 case 8 : // endwhile
2787 if ((data[++index] == 'n')
2788 && (data[++index] == 'd')
2789 && (data[++index] == 'w')
2790 && (data[++index] == 'h')
2791 && (data[++index] == 'i')
2792 && (data[++index] == 'l')
2793 && (data[++index] == 'e'))
2794 return TokenNameendwhile;
2796 return TokenNameIdentifier;
2797 case 9 : // endswitch
2798 if ((data[++index] == 'n')
2799 && (data[++index] == 'd')
2800 && (data[++index] == 's')
2801 && (data[++index] == 'w')
2802 && (data[++index] == 'i')
2803 && (data[++index] == 't')
2804 && (data[++index] == 'c')
2805 && (data[++index] == 'h'))
2806 return TokenNameendswitch;
2808 return TokenNameIdentifier;
2809 case 10 : // endforeach
2810 if ((data[++index] == 'n')
2811 && (data[++index] == 'd')
2812 && (data[++index] == 'f')
2813 && (data[++index] == 'o')
2814 && (data[++index] == 'r')
2815 && (data[++index] == 'e')
2816 && (data[++index] == 'a')
2817 && (data[++index] == 'c')
2818 && (data[++index] == 'h'))
2819 return TokenNameendforeach;
2821 return TokenNameIdentifier;
2824 return TokenNameIdentifier;
2827 case 'f' : //for false function
2830 if ((data[++index] == 'o') && (data[++index] == 'r'))
2831 return TokenNamefor;
2833 return TokenNameIdentifier;
2835 if ((data[++index] == 'a')
2836 && (data[++index] == 'l')
2837 && (data[++index] == 's')
2838 && (data[++index] == 'e'))
2839 return TokenNamefalse;
2841 return TokenNameIdentifier;
2842 case 7 : // function
2843 if ((data[++index] == 'o')
2844 && (data[++index] == 'r')
2845 && (data[++index] == 'e')
2846 && (data[++index] == 'a')
2847 && (data[++index] == 'c')
2848 && (data[++index] == 'h'))
2849 return TokenNameforeach;
2851 return TokenNameIdentifier;
2852 case 8 : // function
2853 if ((data[++index] == 'u')
2854 && (data[++index] == 'n')
2855 && (data[++index] == 'c')
2856 && (data[++index] == 't')
2857 && (data[++index] == 'i')
2858 && (data[++index] == 'o')
2859 && (data[++index] == 'n'))
2860 return TokenNamefunction;
2862 return TokenNameIdentifier;
2864 return TokenNameIdentifier;
2868 if ((data[++index] == 'l')
2869 && (data[++index] == 'o')
2870 && (data[++index] == 'b')
2871 && (data[++index] == 'a')
2872 && (data[++index] == 'l')) {
2873 return TokenNameglobal;
2876 return TokenNameIdentifier;
2881 if (data[++index] == 'f')
2884 return TokenNameIdentifier;
2886 // if ((data[++index] == 'n') && (data[++index] == 't'))
2887 // return TokenNameint;
2889 // return TokenNameIdentifier;
2891 if ((data[++index] == 'n')
2892 && (data[++index] == 'c')
2893 && (data[++index] == 'l')
2894 && (data[++index] == 'u')
2895 && (data[++index] == 'd')
2896 && (data[++index] == 'e'))
2897 return TokenNameinclude;
2899 return TokenNameIdentifier;
2901 if ((data[++index] == 'n')
2902 && (data[++index] == 'c')
2903 && (data[++index] == 'l')
2904 && (data[++index] == 'u')
2905 && (data[++index] == 'd')
2906 && (data[++index] == 'e')
2907 && (data[++index] == '_')
2908 && (data[++index] == 'o')
2909 && (data[++index] == 'n')
2910 && (data[++index] == 'c')
2911 && (data[++index] == 'e'))
2912 return TokenNameinclude_once;
2914 return TokenNameIdentifier;
2916 return TokenNameIdentifier;
2921 if ((data[++index] == 'i')
2922 && (data[++index] == 's')
2923 && (data[++index] == 't')) {
2924 return TokenNamelist;
2927 return TokenNameIdentifier;
2929 case 'n' : // new null
2932 if ((data[++index] == 'e') && (data[++index] == 'w'))
2933 return TokenNamenew;
2935 return TokenNameIdentifier;
2937 if ((data[++index] == 'u')
2938 && (data[++index] == 'l')
2939 && (data[++index] == 'l'))
2940 return TokenNamenull;
2942 return TokenNameIdentifier;
2945 return TokenNameIdentifier;
2947 case 'o' : // or old_function
2949 if (data[++index] == 'r') {
2953 // if (length == 12) {
2954 // if ((data[++index] == 'l')
2955 // && (data[++index] == 'd')
2956 // && (data[++index] == '_')
2957 // && (data[++index] == 'f')
2958 // && (data[++index] == 'u')
2959 // && (data[++index] == 'n')
2960 // && (data[++index] == 'c')
2961 // && (data[++index] == 't')
2962 // && (data[++index] == 'i')
2963 // && (data[++index] == 'o')
2964 // && (data[++index] == 'n')) {
2965 // return TokenNameold_function;
2968 return TokenNameIdentifier;
2972 if ((data[++index] == 'r')
2973 && (data[++index] == 'i')
2974 && (data[++index] == 'n')
2975 && (data[++index] == 't')) {
2976 return TokenNameprint;
2979 return TokenNameIdentifier;
2980 case 'r' : //return require require_once
2982 if ((data[++index] == 'e')
2983 && (data[++index] == 't')
2984 && (data[++index] == 'u')
2985 && (data[++index] == 'r')
2986 && (data[++index] == 'n')) {
2987 return TokenNamereturn;
2989 } else if (length == 7) {
2990 if ((data[++index] == 'e')
2991 && (data[++index] == 'q')
2992 && (data[++index] == 'u')
2993 && (data[++index] == 'i')
2994 && (data[++index] == 'r')
2995 && (data[++index] == 'e')) {
2996 return TokenNamerequire;
2998 } else if (length == 12) {
2999 if ((data[++index] == 'e')
3000 && (data[++index] == 'q')
3001 && (data[++index] == 'u')
3002 && (data[++index] == 'i')
3003 && (data[++index] == 'r')
3004 && (data[++index] == 'e')
3005 && (data[++index] == '_')
3006 && (data[++index] == 'o')
3007 && (data[++index] == 'n')
3008 && (data[++index] == 'c')
3009 && (data[++index] == 'e')) {
3010 return TokenNamerequire_once;
3013 return TokenNameIdentifier;
3015 case 's' : //static switch
3018 if (data[++index] == 't')
3019 if ((data[++index] == 'a')
3020 && (data[++index] == 't')
3021 && (data[++index] == 'i')
3022 && (data[++index] == 'c')) {
3023 return TokenNamestatic;
3025 return TokenNameIdentifier;
3027 (data[index] == 'w')
3028 && (data[++index] == 'i')
3029 && (data[++index] == 't')
3030 && (data[++index] == 'c')
3031 && (data[++index] == 'h'))
3032 return TokenNameswitch;
3034 return TokenNameIdentifier;
3036 return TokenNameIdentifier;
3043 if ((data[++index] == 'r')
3044 && (data[++index] == 'u')
3045 && (data[++index] == 'e'))
3046 return TokenNametrue;
3048 return TokenNameIdentifier;
3049 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3050 // return TokenNamethis;
3053 return TokenNameIdentifier;
3059 if ((data[++index] == 'a') && (data[++index] == 'r'))
3060 return TokenNamevar;
3062 return TokenNameIdentifier;
3065 return TokenNameIdentifier;
3071 if ((data[++index] == 'h')
3072 && (data[++index] == 'i')
3073 && (data[++index] == 'l')
3074 && (data[++index] == 'e'))
3075 return TokenNamewhile;
3077 return TokenNameIdentifier;
3078 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3079 //return TokenNamewidefp ;
3081 //return TokenNameIdentifier;
3083 return TokenNameIdentifier;
3089 if ((data[++index] == 'o') && (data[++index] == 'r'))
3090 return TokenNameXOR;
3092 return TokenNameIdentifier;
3095 return TokenNameIdentifier;
3098 return TokenNameIdentifier;
3101 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3103 //when entering this method the currentCharacter is the firt
3104 //digit of the number , i.e. it may be preceeded by a . when
3107 boolean floating = dotPrefix;
3108 if ((!dotPrefix) && (currentCharacter == '0')) {
3109 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3110 //force the first char of the hexa number do exist...
3111 // consume next character
3112 unicodeAsBackSlash = false;
3113 if (((currentCharacter = source[currentPosition++]) == '\\')
3114 && (source[currentPosition] == 'u')) {
3115 getNextUnicodeChar();
3117 if (withoutUnicodePtr != 0) {
3118 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3121 if (Character.digit(currentCharacter, 16) == -1)
3122 throw new InvalidInputException(INVALID_HEXA);
3124 while (getNextCharAsDigit(16)) {
3126 // if (getNextChar('l', 'L') >= 0)
3127 // return TokenNameLongLiteral;
3129 return TokenNameIntegerLiteral;
3132 //there is x or X in the number
3133 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3134 if (getNextCharAsDigit()) {
3135 //-------------potential octal-----------------
3136 while (getNextCharAsDigit()) {
3139 // if (getNextChar('l', 'L') >= 0) {
3140 // return TokenNameLongLiteral;
3143 // if (getNextChar('f', 'F') >= 0) {
3144 // return TokenNameFloatingPointLiteral;
3147 if (getNextChar('d', 'D') >= 0) {
3148 return TokenNameDoubleLiteral;
3149 } else { //make the distinction between octal and float ....
3150 if (getNextChar('.')) { //bingo ! ....
3151 while (getNextCharAsDigit()) {
3153 if (getNextChar('e', 'E') >= 0) {
3154 // consume next character
3155 unicodeAsBackSlash = false;
3156 if (((currentCharacter = source[currentPosition++]) == '\\')
3157 && (source[currentPosition] == 'u')) {
3158 getNextUnicodeChar();
3160 if (withoutUnicodePtr != 0) {
3161 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3165 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3166 // consume next character
3167 unicodeAsBackSlash = false;
3168 if (((currentCharacter = source[currentPosition++]) == '\\')
3169 && (source[currentPosition] == 'u')) {
3170 getNextUnicodeChar();
3172 if (withoutUnicodePtr != 0) {
3173 withoutUnicodeBuffer[++withoutUnicodePtr] =
3178 if (!Character.isDigit(currentCharacter))
3179 throw new InvalidInputException(INVALID_FLOAT);
3180 while (getNextCharAsDigit()) {
3183 // if (getNextChar('f', 'F') >= 0)
3184 // return TokenNameFloatingPointLiteral;
3185 getNextChar('d', 'D'); //jump over potential d or D
3186 return TokenNameDoubleLiteral;
3188 return TokenNameIntegerLiteral;
3196 while (getNextCharAsDigit()) {
3199 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3200 // return TokenNameLongLiteral;
3202 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3203 while (getNextCharAsDigit()) {
3208 //if floating is true both exponant and suffix may be optional
3210 if (getNextChar('e', 'E') >= 0) {
3212 // consume next character
3213 unicodeAsBackSlash = false;
3214 if (((currentCharacter = source[currentPosition++]) == '\\')
3215 && (source[currentPosition] == 'u')) {
3216 getNextUnicodeChar();
3218 if (withoutUnicodePtr != 0) {
3219 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3223 if ((currentCharacter == '-')
3224 || (currentCharacter == '+')) { // consume next character
3225 unicodeAsBackSlash = false;
3226 if (((currentCharacter = source[currentPosition++]) == '\\')
3227 && (source[currentPosition] == 'u')) {
3228 getNextUnicodeChar();
3230 if (withoutUnicodePtr != 0) {
3231 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3235 if (!Character.isDigit(currentCharacter))
3236 throw new InvalidInputException(INVALID_FLOAT);
3237 while (getNextCharAsDigit()) {
3241 if (getNextChar('d', 'D') >= 0)
3242 return TokenNameDoubleLiteral;
3243 // if (getNextChar('f', 'F') >= 0)
3244 // return TokenNameFloatingPointLiteral;
3246 //the long flag has been tested before
3248 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3251 * Search the line number corresponding to a specific position
3254 public final int getLineNumber(int position) {
3256 if (lineEnds == null)
3258 int length = linePtr + 1;
3261 int g = 0, d = length - 1;
3265 if (position < lineEnds[m]) {
3267 } else if (position > lineEnds[m]) {
3273 if (position < lineEnds[m]) {
3279 public void setPHPMode(boolean mode) {
3283 public final void setSource(char[] source) {
3284 //the source-buffer is set to sourceString
3286 if (source == null) {
3287 this.source = new char[0];
3289 this.source = source;
3292 initialPosition = currentPosition = 0;
3293 containsAssertKeyword = false;
3294 withoutUnicodeBuffer = new char[this.source.length];
3298 public String toString() {
3299 if (startPosition == source.length)
3300 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3301 if (currentPosition > source.length)
3302 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3304 char front[] = new char[startPosition];
3305 System.arraycopy(source, 0, front, 0, startPosition);
3307 int middleLength = (currentPosition - 1) - startPosition + 1;
3309 if (middleLength > -1) {
3310 middle = new char[middleLength];
3311 System.arraycopy(source, startPosition, middle, 0, middleLength);
3313 middle = new char[0];
3316 char end[] = new char[source.length - (currentPosition - 1)];
3319 (currentPosition - 1) + 1,
3322 source.length - (currentPosition - 1) - 1);
3324 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3325 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3328 public final String toStringAction(int act) {
3330 case TokenNameERROR :
3331 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3332 case TokenNameStopPHP :
3333 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3334 case TokenNameIdentifier :
3335 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3336 case TokenNameVariable :
3337 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3339 return "as"; //$NON-NLS-1$
3340 case TokenNamebreak :
3341 return "break"; //$NON-NLS-1$
3342 case TokenNamecase :
3343 return "case"; //$NON-NLS-1$
3344 case TokenNameclass :
3345 return "class"; //$NON-NLS-1$
3346 case TokenNamecontinue :
3347 return "continue"; //$NON-NLS-1$
3348 case TokenNamedefault :
3349 return "default"; //$NON-NLS-1$
3350 case TokenNamedefine :
3351 return "define"; //$NON-NLS-1$
3353 return "do"; //$NON-NLS-1$
3354 case TokenNameecho :
3355 return "echo"; //$NON-NLS-1$
3356 case TokenNameelse :
3357 return "else"; //$NON-NLS-1$
3358 case TokenNameelseif :
3359 return "elseif"; //$NON-NLS-1$
3360 case TokenNameendfor :
3361 return "endfor"; //$NON-NLS-1$
3362 case TokenNameendforeach :
3363 return "endforeach"; //$NON-NLS-1$
3364 case TokenNameendif :
3365 return "endif"; //$NON-NLS-1$
3366 case TokenNameendswitch :
3367 return "endswitch"; //$NON-NLS-1$
3368 case TokenNameendwhile :
3369 return "endwhile"; //$NON-NLS-1$
3370 case TokenNameextends :
3371 return "extends"; //$NON-NLS-1$
3372 case TokenNamefalse :
3373 return "false"; //$NON-NLS-1$
3375 return "for"; //$NON-NLS-1$
3376 case TokenNameforeach :
3377 return "foreach"; //$NON-NLS-1$
3378 case TokenNamefunction :
3379 return "function"; //$NON-NLS-1$
3380 case TokenNameglobal :
3381 return "global"; //$NON-NLS-1$
3383 return "if"; //$NON-NLS-1$
3384 case TokenNameinclude :
3385 return "include"; //$NON-NLS-1$
3386 case TokenNameinclude_once :
3387 return "include_once"; //$NON-NLS-1$
3388 case TokenNamelist :
3389 return "list"; //$NON-NLS-1$
3391 return "new"; //$NON-NLS-1$
3392 case TokenNamenull :
3393 return "null"; //$NON-NLS-1$
3394 case TokenNameprint :
3395 return "print"; //$NON-NLS-1$
3396 case TokenNamerequire :
3397 return "require"; //$NON-NLS-1$
3398 case TokenNamerequire_once :
3399 return "require_once"; //$NON-NLS-1$
3400 case TokenNamereturn :
3401 return "return"; //$NON-NLS-1$
3402 case TokenNamestatic :
3403 return "static"; //$NON-NLS-1$
3404 case TokenNameswitch :
3405 return "switch"; //$NON-NLS-1$
3406 case TokenNametrue :
3407 return "true"; //$NON-NLS-1$
3409 return "var"; //$NON-NLS-1$
3410 case TokenNamewhile :
3411 return "while"; //$NON-NLS-1$
3412 case TokenNameIntegerLiteral :
3413 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3414 case TokenNameDoubleLiteral :
3415 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3416 case TokenNameStringLiteral :
3417 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3418 case TokenNameStringConstant :
3419 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3420 case TokenNameStringInterpolated :
3421 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3422 case TokenNameHEREDOC :
3423 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3425 case TokenNamePLUS_PLUS :
3426 return "++"; //$NON-NLS-1$
3427 case TokenNameMINUS_MINUS :
3428 return "--"; //$NON-NLS-1$
3429 case TokenNameEQUAL_EQUAL :
3430 return "=="; //$NON-NLS-1$
3431 case TokenNameEQUAL_GREATER :
3432 return "=>"; //$NON-NLS-1$
3433 case TokenNameLESS_EQUAL :
3434 return "<="; //$NON-NLS-1$
3435 case TokenNameGREATER_EQUAL :
3436 return ">="; //$NON-NLS-1$
3437 case TokenNameNOT_EQUAL :
3438 return "!="; //$NON-NLS-1$
3439 case TokenNameLEFT_SHIFT :
3440 return "<<"; //$NON-NLS-1$
3441 case TokenNameRIGHT_SHIFT :
3442 return ">>"; //$NON-NLS-1$
3443 case TokenNamePLUS_EQUAL :
3444 return "+="; //$NON-NLS-1$
3445 case TokenNameMINUS_EQUAL :
3446 return "-="; //$NON-NLS-1$
3447 case TokenNameMULTIPLY_EQUAL :
3448 return "*="; //$NON-NLS-1$
3449 case TokenNameDIVIDE_EQUAL :
3450 return "/="; //$NON-NLS-1$
3451 case TokenNameAND_EQUAL :
3452 return "&="; //$NON-NLS-1$
3453 case TokenNameOR_EQUAL :
3454 return "|="; //$NON-NLS-1$
3455 case TokenNameXOR_EQUAL :
3456 return "^="; //$NON-NLS-1$
3457 case TokenNameREMAINDER_EQUAL :
3458 return "%="; //$NON-NLS-1$
3459 case TokenNameLEFT_SHIFT_EQUAL :
3460 return "<<="; //$NON-NLS-1$
3461 case TokenNameRIGHT_SHIFT_EQUAL :
3462 return ">>="; //$NON-NLS-1$
3463 case TokenNameOR_OR :
3464 return "||"; //$NON-NLS-1$
3465 case TokenNameAND_AND :
3466 return "&&"; //$NON-NLS-1$
3467 case TokenNamePLUS :
3468 return "+"; //$NON-NLS-1$
3469 case TokenNameMINUS :
3470 return "-"; //$NON-NLS-1$
3471 case TokenNameMINUS_GREATER :
3474 return "!"; //$NON-NLS-1$
3475 case TokenNameREMAINDER :
3476 return "%"; //$NON-NLS-1$
3478 return "^"; //$NON-NLS-1$
3480 return "&"; //$NON-NLS-1$
3481 case TokenNameMULTIPLY :
3482 return "*"; //$NON-NLS-1$
3484 return "|"; //$NON-NLS-1$
3485 case TokenNameTWIDDLE :
3486 return "~"; //$NON-NLS-1$
3487 case TokenNameTWIDDLE_EQUAL :
3488 return "~="; //$NON-NLS-1$
3489 case TokenNameDIVIDE :
3490 return "/"; //$NON-NLS-1$
3491 case TokenNameGREATER :
3492 return ">"; //$NON-NLS-1$
3493 case TokenNameLESS :
3494 return "<"; //$NON-NLS-1$
3495 case TokenNameLPAREN :
3496 return "("; //$NON-NLS-1$
3497 case TokenNameRPAREN :
3498 return ")"; //$NON-NLS-1$
3499 case TokenNameLBRACE :
3500 return "{"; //$NON-NLS-1$
3501 case TokenNameRBRACE :
3502 return "}"; //$NON-NLS-1$
3503 case TokenNameLBRACKET :
3504 return "["; //$NON-NLS-1$
3505 case TokenNameRBRACKET :
3506 return "]"; //$NON-NLS-1$
3507 case TokenNameSEMICOLON :
3508 return ";"; //$NON-NLS-1$
3509 case TokenNameQUESTION :
3510 return "?"; //$NON-NLS-1$
3511 case TokenNameCOLON :
3512 return ":"; //$NON-NLS-1$
3513 case TokenNameCOMMA :
3514 return ","; //$NON-NLS-1$
3516 return "."; //$NON-NLS-1$
3517 case TokenNameEQUAL :
3518 return "="; //$NON-NLS-1$
3521 case TokenNameDOLLAR_LBRACE :
3524 return "EOF"; //$NON-NLS-1$
3526 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3531 boolean tokenizeComments,
3532 boolean tokenizeWhiteSpace,
3533 boolean checkNonExternalizedStringLiterals) {
3537 checkNonExternalizedStringLiterals,
3542 boolean tokenizeComments,
3543 boolean tokenizeWhiteSpace,
3544 boolean checkNonExternalizedStringLiterals,
3545 boolean assertMode) {
3546 this.eofPosition = Integer.MAX_VALUE;
3547 this.tokenizeComments = tokenizeComments;
3548 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3549 this.checkNonExternalizedStringLiterals =
3550 checkNonExternalizedStringLiterals;
3551 this.assertMode = assertMode;
3554 private void checkNonExternalizeString() throws InvalidInputException {
3555 if (currentLine == null)
3557 parseTags(currentLine);
3560 private void parseTags(NLSLine line) throws InvalidInputException {
3561 String s = new String(getCurrentTokenSource());
3562 int pos = s.indexOf(TAG_PREFIX);
3563 int lineLength = line.size();
3565 int start = pos + TAG_PREFIX_LENGTH;
3566 int end = s.indexOf(TAG_POSTFIX, start);
3567 String index = s.substring(start, end);
3570 i = Integer.parseInt(index) - 1;
3571 // Tags are one based not zero based.
3572 } catch (NumberFormatException e) {
3573 i = -1; // we don't want to consider this as a valid NLS tag
3575 if (line.exists(i)) {
3578 pos = s.indexOf(TAG_PREFIX, start);
3581 this.nonNLSStrings = new StringLiteral[lineLength];
3582 int nonNLSCounter = 0;
3583 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3584 StringLiteral literal = (StringLiteral) iterator.next();
3585 if (literal != null) {
3586 this.nonNLSStrings[nonNLSCounter++] = literal;
3589 if (nonNLSCounter == 0) {
3590 this.nonNLSStrings = null;
3594 this.wasNonExternalizedStringLiteral = true;
3595 if (nonNLSCounter != lineLength) {
3599 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),