1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray =
120 new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121 static final int TableSize = 30, InternalTableSize = 6;
123 public static final int OptimizedLength = 6;
125 final char[][][][] charArray_length =
126 new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0,
155 public static final int RoundBracket = 0;
156 public static final int SquareBracket = 1;
157 public static final int CurlyBracket = 2;
158 public static final int BracketKinds = 3;
160 public static final boolean DEBUG = false;
164 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
165 this(tokenizeComments, tokenizeWhiteSpace, false);
169 * Determines if the specified character is
170 * permissible as the first character in a PHP identifier
172 public static boolean isPHPIdentifierStart(char ch) {
173 return Character.isLetter(ch) || (ch == '_');
177 * Determines if the specified character may be part of a PHP identifier as
178 * other than the first character
180 public static boolean isPHPIdentifierPart(char ch) {
181 return Character.isLetterOrDigit(ch) || (ch == '_');
184 public final boolean atEnd() {
185 // This code is not relevant if source is
186 // Only a part of the real stream input
188 return source.length == currentPosition;
190 public char[] getCurrentIdentifierSource() {
191 //return the token REAL source (aka unicodes are precomputed)
194 if (withoutUnicodePtr != 0)
195 //0 is used as a fast test flag so the real first char is in position 1
197 withoutUnicodeBuffer,
199 result = new char[withoutUnicodePtr],
203 int length = currentPosition - startPosition;
204 switch (length) { // see OptimizedLength
206 return optimizedCurrentTokenSource1();
208 return optimizedCurrentTokenSource2();
210 return optimizedCurrentTokenSource3();
212 return optimizedCurrentTokenSource4();
214 return optimizedCurrentTokenSource5();
216 return optimizedCurrentTokenSource6();
222 result = new char[length],
228 public int getCurrentTokenEndPosition() {
229 return this.currentPosition - 1;
231 public final char[] getCurrentTokenSource() {
232 // Return the token REAL source (aka unicodes are precomputed)
235 if (withoutUnicodePtr != 0)
236 // 0 is used as a fast test flag so the real first char is in position 1
238 withoutUnicodeBuffer,
240 result = new char[withoutUnicodePtr],
248 result = new char[length = currentPosition - startPosition],
255 public final char[] getCurrentTokenSource(int startPos) {
256 // Return the token REAL source (aka unicodes are precomputed)
259 if (withoutUnicodePtr != 0)
260 // 0 is used as a fast test flag so the real first char is in position 1
262 withoutUnicodeBuffer,
264 result = new char[withoutUnicodePtr],
272 result = new char[length = currentPosition - startPos],
279 public final char[] getCurrentTokenSourceString() {
280 //return the token REAL source (aka unicodes are precomputed).
281 //REMOVE the two " that are at the beginning and the end.
284 if (withoutUnicodePtr != 0)
285 //0 is used as a fast test flag so the real first char is in position 1
286 System.arraycopy(withoutUnicodeBuffer, 2,
287 //2 is 1 (real start) + 1 (to jump over the ")
288 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
294 result = new char[length = currentPosition - startPosition - 2],
300 public int getCurrentTokenStartPosition() {
301 return this.startPosition;
304 * Search the source position corresponding to the end of a given line number
306 * Line numbers are 1-based, and relative to the scanner initialPosition.
307 * Character positions are 0-based.
309 * In case the given line number is inconsistent, answers -1.
311 public final int getLineEnd(int lineNumber) {
313 if (lineEnds == null)
315 if (lineNumber >= lineEnds.length)
320 if (lineNumber == lineEnds.length - 1)
322 return lineEnds[lineNumber - 1];
323 // next line start one character behind the lineEnd of the previous line
326 * Search the source position corresponding to the beginning of a given line number
328 * Line numbers are 1-based, and relative to the scanner initialPosition.
329 * Character positions are 0-based.
331 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
333 * In case the given line number is inconsistent, answers -1.
335 public final int getLineStart(int lineNumber) {
337 if (lineEnds == null)
339 if (lineNumber >= lineEnds.length)
345 return initialPosition;
346 return lineEnds[lineNumber - 2] + 1;
347 // next line start one character behind the lineEnd of the previous line
349 public final boolean getNextChar(char testedChar) {
351 //handle the case of unicode.
352 //when a unicode appears then we must use a buffer that holds char internal values
353 //At the end of this method currentCharacter holds the new visited char
354 //and currentPosition points right next after it
355 //Both previous lines are true if the currentCharacter is == to the testedChar
356 //On false, no side effect has occured.
358 //ALL getNextChar.... ARE OPTIMIZED COPIES
360 int temp = currentPosition;
362 if (((currentCharacter = source[currentPosition++]) == '\\')
363 && (source[currentPosition] == 'u')) {
364 //-------------unicode traitement ------------
368 while (source[currentPosition] == 'u') {
373 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
375 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
377 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
379 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
381 currentPosition = temp;
385 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
386 if (currentCharacter != testedChar) {
387 currentPosition = temp;
390 unicodeAsBackSlash = currentCharacter == '\\';
392 //need the unicode buffer
393 if (withoutUnicodePtr == 0) {
394 //buffer all the entries that have been left aside....
395 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
399 withoutUnicodeBuffer,
403 //fill the buffer with the char
404 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
407 } //-------------end unicode traitement--------------
409 if (currentCharacter != testedChar) {
410 currentPosition = temp;
413 unicodeAsBackSlash = false;
414 if (withoutUnicodePtr != 0)
415 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
418 } catch (IndexOutOfBoundsException e) {
419 unicodeAsBackSlash = false;
420 currentPosition = temp;
424 public final int getNextChar(char testedChar1, char testedChar2) {
425 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
426 //test can be done with (x==0) for the first and (x>0) for the second
427 //handle the case of unicode.
428 //when a unicode appears then we must use a buffer that holds char internal values
429 //At the end of this method currentCharacter holds the new visited char
430 //and currentPosition points right next after it
431 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
432 //On false, no side effect has occured.
434 //ALL getNextChar.... ARE OPTIMIZED COPIES
436 int temp = currentPosition;
439 if (((currentCharacter = source[currentPosition++]) == '\\')
440 && (source[currentPosition] == 'u')) {
441 //-------------unicode traitement ------------
445 while (source[currentPosition] == 'u') {
450 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
452 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
454 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
456 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
458 currentPosition = temp;
462 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
463 if (currentCharacter == testedChar1)
465 else if (currentCharacter == testedChar2)
468 currentPosition = temp;
472 //need the unicode buffer
473 if (withoutUnicodePtr == 0) {
474 //buffer all the entries that have been left aside....
475 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
479 withoutUnicodeBuffer,
483 //fill the buffer with the char
484 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
486 } //-------------end unicode traitement--------------
488 if (currentCharacter == testedChar1)
490 else if (currentCharacter == testedChar2)
493 currentPosition = temp;
497 if (withoutUnicodePtr != 0)
498 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
501 } catch (IndexOutOfBoundsException e) {
502 currentPosition = temp;
506 public final boolean getNextCharAsDigit() {
508 //handle the case of unicode.
509 //when a unicode appears then we must use a buffer that holds char internal values
510 //At the end of this method currentCharacter holds the new visited char
511 //and currentPosition points right next after it
512 //Both previous lines are true if the currentCharacter is a digit
513 //On false, no side effect has occured.
515 //ALL getNextChar.... ARE OPTIMIZED COPIES
517 int temp = currentPosition;
519 if (((currentCharacter = source[currentPosition++]) == '\\')
520 && (source[currentPosition] == 'u')) {
521 //-------------unicode traitement ------------
525 while (source[currentPosition] == 'u') {
530 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
532 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
534 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
536 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
538 currentPosition = temp;
542 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
543 if (!Character.isDigit(currentCharacter)) {
544 currentPosition = temp;
548 //need the unicode buffer
549 if (withoutUnicodePtr == 0) {
550 //buffer all the entries that have been left aside....
551 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
555 withoutUnicodeBuffer,
559 //fill the buffer with the char
560 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
562 } //-------------end unicode traitement--------------
564 if (!Character.isDigit(currentCharacter)) {
565 currentPosition = temp;
568 if (withoutUnicodePtr != 0)
569 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
572 } catch (IndexOutOfBoundsException e) {
573 currentPosition = temp;
577 public final boolean getNextCharAsDigit(int radix) {
579 //handle the case of unicode.
580 //when a unicode appears then we must use a buffer that holds char internal values
581 //At the end of this method currentCharacter holds the new visited char
582 //and currentPosition points right next after it
583 //Both previous lines are true if the currentCharacter is a digit base on radix
584 //On false, no side effect has occured.
586 //ALL getNextChar.... ARE OPTIMIZED COPIES
588 int temp = currentPosition;
590 if (((currentCharacter = source[currentPosition++]) == '\\')
591 && (source[currentPosition] == 'u')) {
592 //-------------unicode traitement ------------
596 while (source[currentPosition] == 'u') {
601 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
603 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
605 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
607 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
609 currentPosition = temp;
613 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
614 if (Character.digit(currentCharacter, radix) == -1) {
615 currentPosition = temp;
619 //need the unicode buffer
620 if (withoutUnicodePtr == 0) {
621 //buffer all the entries that have been left aside....
622 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
626 withoutUnicodeBuffer,
630 //fill the buffer with the char
631 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
633 } //-------------end unicode traitement--------------
635 if (Character.digit(currentCharacter, radix) == -1) {
636 currentPosition = temp;
639 if (withoutUnicodePtr != 0)
640 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
643 } catch (IndexOutOfBoundsException e) {
644 currentPosition = temp;
648 public boolean getNextCharAsJavaIdentifierPart() {
650 //handle the case of unicode.
651 //when a unicode appears then we must use a buffer that holds char internal values
652 //At the end of this method currentCharacter holds the new visited char
653 //and currentPosition points right next after it
654 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
655 //On false, no side effect has occured.
657 //ALL getNextChar.... ARE OPTIMIZED COPIES
659 int temp = currentPosition;
661 if (((currentCharacter = source[currentPosition++]) == '\\')
662 && (source[currentPosition] == 'u')) {
663 //-------------unicode traitement ------------
667 while (source[currentPosition] == 'u') {
672 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
674 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
676 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
678 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
680 currentPosition = temp;
684 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
685 if (!isPHPIdentifierPart(currentCharacter)) {
686 currentPosition = temp;
690 //need the unicode buffer
691 if (withoutUnicodePtr == 0) {
692 //buffer all the entries that have been left aside....
693 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
697 withoutUnicodeBuffer,
701 //fill the buffer with the char
702 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
704 } //-------------end unicode traitement--------------
706 if (!isPHPIdentifierPart(currentCharacter)) {
707 currentPosition = temp;
711 if (withoutUnicodePtr != 0)
712 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
715 } catch (IndexOutOfBoundsException e) {
716 currentPosition = temp;
721 public int getNextToken() throws InvalidInputException {
722 int htmlPosition = currentPosition;
725 currentCharacter = source[currentPosition++];
726 if (currentCharacter == '<') {
727 if (getNextChar('?')) {
728 currentCharacter = source[currentPosition++];
729 if ((currentCharacter == ' ')
730 || Character.isWhitespace(currentCharacter)) {
732 startPosition = currentPosition;
734 if (tokenizeWhiteSpace) {
735 // && (whiteStart != currentPosition - 1)) {
736 // reposition scanner in case we are interested by spaces as tokens
737 startPosition = htmlPosition;
738 return TokenNameHTML;
742 (currentCharacter == 'P') || (currentCharacter == 'p');
744 int test = getNextChar('H', 'h');
746 test = getNextChar('P', 'p');
749 startPosition = currentPosition;
752 if (tokenizeWhiteSpace) {
753 // && (whiteStart != currentPosition - 1)) {
754 // reposition scanner in case we are interested by spaces as tokens
755 startPosition = htmlPosition;
756 return TokenNameHTML;
765 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
766 if (recordLineSeparator) {
773 } //-----------------end switch while try--------------------
774 catch (IndexOutOfBoundsException e) {
775 if (tokenizeWhiteSpace) {
776 // && (whiteStart != currentPosition - 1)) {
777 // reposition scanner in case we are interested by spaces as tokens
778 startPosition = htmlPosition;
786 jumpOverMethodBody();
788 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
791 while (true) { //loop for jumping over comments
792 withoutUnicodePtr = 0;
793 //start with a new token (even comment written with unicode )
795 // ---------Consume white space and handles startPosition---------
796 int whiteStart = currentPosition;
797 boolean isWhiteSpace;
799 startPosition = currentPosition;
800 if (((currentCharacter = source[currentPosition++]) == '\\')
801 && (source[currentPosition] == 'u')) {
802 isWhiteSpace = jumpOverUnicodeWhiteSpace();
804 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
805 checkNonExternalizeString();
806 if (recordLineSeparator) {
813 (currentCharacter == ' ')
814 || Character.isWhitespace(currentCharacter);
816 } while (isWhiteSpace);
817 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
818 // reposition scanner in case we are interested by spaces as tokens
820 startPosition = whiteStart;
821 return TokenNameWHITESPACE;
823 //little trick to get out in the middle of a source compuation
824 if (currentPosition > eofPosition)
827 // ---------Identify the next token-------------
829 switch (currentCharacter) {
831 return TokenNameLPAREN;
833 return TokenNameRPAREN;
835 return TokenNameLBRACE;
837 return TokenNameRBRACE;
839 return TokenNameLBRACKET;
841 return TokenNameRBRACKET;
843 return TokenNameSEMICOLON;
845 return TokenNameCOMMA;
848 if (getNextCharAsDigit())
849 return scanNumber(true);
854 if ((test = getNextChar('+', '=')) == 0)
855 return TokenNamePLUS_PLUS;
857 return TokenNamePLUS_EQUAL;
858 return TokenNamePLUS;
863 if ((test = getNextChar('-', '=')) == 0)
864 return TokenNameMINUS_MINUS;
866 return TokenNameMINUS_EQUAL;
867 if (getNextChar('>'))
868 return TokenNameMINUS_GREATER;
870 return TokenNameMINUS;
873 if (getNextChar('='))
874 return TokenNameTWIDDLE_EQUAL;
875 return TokenNameTWIDDLE;
877 if (getNextChar('='))
878 return TokenNameNOT_EQUAL;
881 if (getNextChar('='))
882 return TokenNameMULTIPLY_EQUAL;
883 return TokenNameMULTIPLY;
885 if (getNextChar('='))
886 return TokenNameREMAINDER_EQUAL;
887 return TokenNameREMAINDER;
891 if ((test = getNextChar('=', '<')) == 0)
892 return TokenNameLESS_EQUAL;
894 if (getNextChar('='))
895 return TokenNameLEFT_SHIFT_EQUAL;
896 if (getNextChar('<')) {
897 int heredocStart = currentPosition;
898 int heredocLength = 0;
899 currentCharacter = source[currentPosition++];
900 if (isPHPIdentifierStart(currentCharacter)) {
901 currentCharacter = source[currentPosition++];
903 return TokenNameERROR;
905 while (isPHPIdentifierPart(currentCharacter)) {
906 currentCharacter = source[currentPosition++];
909 heredocLength = currentPosition - heredocStart - 1;
911 // heredoc end-tag determination
912 boolean endTag = true;
915 ch = source[currentPosition++];
916 if (ch == '\r' || ch == '\n') {
917 if (recordLineSeparator) {
922 for (int i = 0; i < heredocLength; i++) {
923 if (source[currentPosition + i]
924 != source[heredocStart + i]) {
930 currentPosition += heredocLength - 1;
931 currentCharacter = source[currentPosition++];
932 break; // do...while loop
940 return TokenNameHEREDOC;
942 return TokenNameLEFT_SHIFT;
944 return TokenNameLESS;
949 if ((test = getNextChar('=', '>')) == 0)
950 return TokenNameGREATER_EQUAL;
952 if ((test = getNextChar('=', '>')) == 0)
953 return TokenNameRIGHT_SHIFT_EQUAL;
954 return TokenNameRIGHT_SHIFT;
956 return TokenNameGREATER;
959 if (getNextChar('='))
960 return TokenNameEQUAL_EQUAL;
961 if (getNextChar('>'))
962 return TokenNameEQUAL_GREATER;
963 return TokenNameEQUAL;
967 if ((test = getNextChar('&', '=')) == 0)
968 return TokenNameAND_AND;
970 return TokenNameAND_EQUAL;
976 if ((test = getNextChar('|', '=')) == 0)
977 return TokenNameOR_OR;
979 return TokenNameOR_EQUAL;
983 if (getNextChar('='))
984 return TokenNameXOR_EQUAL;
987 if (getNextChar('>')) {
989 return TokenNameStopPHP;
991 return TokenNameQUESTION;
993 if (getNextChar(':'))
994 return TokenNameCOLON_COLON;
995 return TokenNameCOLON;
1001 // if ((test = getNextChar('\n', '\r')) == 0) {
1002 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1005 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1006 // for (int lookAhead = 0;
1009 // if (currentPosition + lookAhead
1010 // == source.length)
1012 // if (source[currentPosition + lookAhead]
1015 // if (source[currentPosition + lookAhead]
1017 // currentPosition += lookAhead + 1;
1021 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1024 // if (getNextChar('\'')) {
1025 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1026 // for (int lookAhead = 0;
1029 // if (currentPosition + lookAhead
1030 // == source.length)
1032 // if (source[currentPosition + lookAhead]
1035 // if (source[currentPosition + lookAhead]
1037 // currentPosition += lookAhead + 1;
1041 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1043 // if (getNextChar('\\'))
1044 // scanEscapeCharacter();
1045 // else { // consume next character
1046 // unicodeAsBackSlash = false;
1047 // if (((currentCharacter = source[currentPosition++])
1049 // && (source[currentPosition] == 'u')) {
1050 // getNextUnicodeChar();
1052 // if (withoutUnicodePtr != 0) {
1053 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1054 // currentCharacter;
1058 // // if (getNextChar('\''))
1059 // // return TokenNameCharacterLiteral;
1060 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1061 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1062 // if (currentPosition + lookAhead == source.length)
1064 // if (source[currentPosition + lookAhead] == '\n')
1066 // if (source[currentPosition + lookAhead] == '\'') {
1067 // currentPosition += lookAhead + 1;
1071 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1074 // consume next character
1075 unicodeAsBackSlash = false;
1076 if (((currentCharacter = source[currentPosition++]) == '\\')
1077 && (source[currentPosition] == 'u')) {
1078 getNextUnicodeChar();
1080 if (withoutUnicodePtr != 0) {
1081 withoutUnicodeBuffer[++withoutUnicodePtr] =
1086 while (currentCharacter != '\'') {
1088 /**** in PHP \r and \n are valid in string literals ****/
1089 // if ((currentCharacter == '\n')
1090 // || (currentCharacter == '\r')) {
1091 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1092 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1093 // if (currentPosition + lookAhead == source.length)
1095 // if (source[currentPosition + lookAhead] == '\n')
1097 // if (source[currentPosition + lookAhead] == '\"') {
1098 // currentPosition += lookAhead + 1;
1102 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1104 if (currentCharacter == '\\') {
1105 int escapeSize = currentPosition;
1106 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1107 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1108 scanSingleQuotedEscapeCharacter();
1109 escapeSize = currentPosition - escapeSize;
1110 if (withoutUnicodePtr == 0) {
1111 //buffer all the entries that have been left aside....
1113 currentPosition - escapeSize - 1 - startPosition;
1117 withoutUnicodeBuffer,
1120 withoutUnicodeBuffer[++withoutUnicodePtr] =
1122 } else { //overwrite the / in the buffer
1123 withoutUnicodeBuffer[withoutUnicodePtr] =
1125 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1126 withoutUnicodePtr--;
1130 // consume next character
1131 unicodeAsBackSlash = false;
1132 if (((currentCharacter = source[currentPosition++]) == '\\')
1133 && (source[currentPosition] == 'u')) {
1134 getNextUnicodeChar();
1136 if (withoutUnicodePtr != 0) {
1137 withoutUnicodeBuffer[++withoutUnicodePtr] =
1143 } catch (IndexOutOfBoundsException e) {
1144 throw new InvalidInputException(UNTERMINATED_STRING);
1145 } catch (InvalidInputException e) {
1146 if (e.getMessage().equals(INVALID_ESCAPE)) {
1147 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1148 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1149 if (currentPosition + lookAhead == source.length)
1151 if (source[currentPosition + lookAhead] == '\n')
1153 if (source[currentPosition + lookAhead] == '\'') {
1154 currentPosition += lookAhead + 1;
1162 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1163 if (currentLine == null) {
1164 currentLine = new NLSLine();
1165 lines.add(currentLine);
1169 getCurrentTokenSourceString(),
1171 currentPosition - 1));
1173 return TokenNameStringConstant;
1176 // consume next character
1177 unicodeAsBackSlash = false;
1178 if (((currentCharacter = source[currentPosition++]) == '\\')
1179 && (source[currentPosition] == 'u')) {
1180 getNextUnicodeChar();
1182 if (withoutUnicodePtr != 0) {
1183 withoutUnicodeBuffer[++withoutUnicodePtr] =
1188 while (currentCharacter != '"') {
1190 /**** in PHP \r and \n are valid in string literals ****/
1191 // if ((currentCharacter == '\n')
1192 // || (currentCharacter == '\r')) {
1193 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1194 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1195 // if (currentPosition + lookAhead == source.length)
1197 // if (source[currentPosition + lookAhead] == '\n')
1199 // if (source[currentPosition + lookAhead] == '\"') {
1200 // currentPosition += lookAhead + 1;
1204 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1206 if (currentCharacter == '\\') {
1207 int escapeSize = currentPosition;
1208 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1209 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1210 scanDoubleQuotedEscapeCharacter();
1211 escapeSize = currentPosition - escapeSize;
1212 if (withoutUnicodePtr == 0) {
1213 //buffer all the entries that have been left aside....
1215 currentPosition - escapeSize - 1 - startPosition;
1219 withoutUnicodeBuffer,
1222 withoutUnicodeBuffer[++withoutUnicodePtr] =
1224 } else { //overwrite the / in the buffer
1225 withoutUnicodeBuffer[withoutUnicodePtr] =
1227 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1228 withoutUnicodePtr--;
1232 // consume next character
1233 unicodeAsBackSlash = false;
1234 if (((currentCharacter = source[currentPosition++]) == '\\')
1235 && (source[currentPosition] == 'u')) {
1236 getNextUnicodeChar();
1238 if (withoutUnicodePtr != 0) {
1239 withoutUnicodeBuffer[++withoutUnicodePtr] =
1245 } catch (IndexOutOfBoundsException e) {
1246 throw new InvalidInputException(UNTERMINATED_STRING);
1247 } catch (InvalidInputException e) {
1248 if (e.getMessage().equals(INVALID_ESCAPE)) {
1249 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1250 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1251 if (currentPosition + lookAhead == source.length)
1253 if (source[currentPosition + lookAhead] == '\n')
1255 if (source[currentPosition + lookAhead] == '\"') {
1256 currentPosition += lookAhead + 1;
1264 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1265 if (currentLine == null) {
1266 currentLine = new NLSLine();
1267 lines.add(currentLine);
1271 getCurrentTokenSourceString(),
1273 currentPosition - 1));
1275 return TokenNameStringLiteral;
1278 // consume next character
1279 unicodeAsBackSlash = false;
1280 if (((currentCharacter = source[currentPosition++]) == '\\')
1281 && (source[currentPosition] == 'u')) {
1282 getNextUnicodeChar();
1284 if (withoutUnicodePtr != 0) {
1285 withoutUnicodeBuffer[++withoutUnicodePtr] =
1290 while (currentCharacter != '`') {
1292 /**** in PHP \r and \n are valid in string literals ****/
1293 // if ((currentCharacter == '\n')
1294 // || (currentCharacter == '\r')) {
1295 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1296 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1297 // if (currentPosition + lookAhead == source.length)
1299 // if (source[currentPosition + lookAhead] == '\n')
1301 // if (source[currentPosition + lookAhead] == '\"') {
1302 // currentPosition += lookAhead + 1;
1306 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1308 if (currentCharacter == '\\') {
1309 int escapeSize = currentPosition;
1310 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1311 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1312 scanDoubleQuotedEscapeCharacter();
1313 escapeSize = currentPosition - escapeSize;
1314 if (withoutUnicodePtr == 0) {
1315 //buffer all the entries that have been left aside....
1317 currentPosition - escapeSize - 1 - startPosition;
1321 withoutUnicodeBuffer,
1324 withoutUnicodeBuffer[++withoutUnicodePtr] =
1326 } else { //overwrite the / in the buffer
1327 withoutUnicodeBuffer[withoutUnicodePtr] =
1329 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1330 withoutUnicodePtr--;
1334 // consume next character
1335 unicodeAsBackSlash = false;
1336 if (((currentCharacter = source[currentPosition++]) == '\\')
1337 && (source[currentPosition] == 'u')) {
1338 getNextUnicodeChar();
1340 if (withoutUnicodePtr != 0) {
1341 withoutUnicodeBuffer[++withoutUnicodePtr] =
1347 } catch (IndexOutOfBoundsException e) {
1348 throw new InvalidInputException(UNTERMINATED_STRING);
1349 } catch (InvalidInputException e) {
1350 if (e.getMessage().equals(INVALID_ESCAPE)) {
1351 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1352 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1353 if (currentPosition + lookAhead == source.length)
1355 if (source[currentPosition + lookAhead] == '\n')
1357 if (source[currentPosition + lookAhead] == '`') {
1358 currentPosition += lookAhead + 1;
1366 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1367 if (currentLine == null) {
1368 currentLine = new NLSLine();
1369 lines.add(currentLine);
1373 getCurrentTokenSourceString(),
1375 currentPosition - 1));
1377 return TokenNameStringInterpolated;
1382 if ((currentCharacter == '#')
1383 || (test = getNextChar('/', '*')) == 0) {
1385 int endPositionForLineComment = 0;
1386 try { //get the next char
1387 if (((currentCharacter = source[currentPosition++])
1389 && (source[currentPosition] == 'u')) {
1390 //-------------unicode traitement ------------
1391 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1393 while (source[currentPosition] == 'u') {
1397 Character.getNumericValue(source[currentPosition++]))
1401 Character.getNumericValue(source[currentPosition++]))
1405 Character.getNumericValue(source[currentPosition++]))
1409 Character.getNumericValue(source[currentPosition++]))
1412 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1415 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1419 //handle the \\u case manually into comment
1420 if (currentCharacter == '\\') {
1421 if (source[currentPosition] == '\\')
1423 } //jump over the \\
1424 boolean isUnicode = false;
1425 while (currentCharacter != '\r'
1426 && currentCharacter != '\n') {
1427 if (currentCharacter == '?') {
1428 if (getNextChar('>')) {
1429 startPosition = currentPosition - 2;
1431 return TokenNameStopPHP;
1437 if (((currentCharacter = source[currentPosition++])
1439 && (source[currentPosition] == 'u')) {
1441 //-------------unicode traitement ------------
1442 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1444 while (source[currentPosition] == 'u') {
1448 Character.getNumericValue(source[currentPosition++]))
1452 Character.getNumericValue(
1453 source[currentPosition++]))
1457 Character.getNumericValue(
1458 source[currentPosition++]))
1462 Character.getNumericValue(
1463 source[currentPosition++]))
1466 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1469 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1472 //handle the \\u case manually into comment
1473 if (currentCharacter == '\\') {
1474 if (source[currentPosition] == '\\')
1476 } //jump over the \\
1479 endPositionForLineComment = currentPosition - 6;
1481 endPositionForLineComment = currentPosition - 1;
1483 recordComment(false);
1484 if ((currentCharacter == '\r')
1485 || (currentCharacter == '\n')) {
1486 checkNonExternalizeString();
1487 if (recordLineSeparator) {
1489 pushUnicodeLineSeparator();
1491 pushLineSeparator();
1497 if (tokenizeComments) {
1499 currentPosition = endPositionForLineComment;
1500 // reset one character behind
1502 return TokenNameCOMMENT_LINE;
1504 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1505 if (tokenizeComments) {
1507 // reset one character behind
1508 return TokenNameCOMMENT_LINE;
1514 //traditional and annotation comment
1515 boolean isJavadoc = false, star = false;
1516 // consume next character
1517 unicodeAsBackSlash = false;
1518 if (((currentCharacter = source[currentPosition++]) == '\\')
1519 && (source[currentPosition] == 'u')) {
1520 getNextUnicodeChar();
1522 if (withoutUnicodePtr != 0) {
1523 withoutUnicodeBuffer[++withoutUnicodePtr] =
1528 if (currentCharacter == '*') {
1532 if ((currentCharacter == '\r')
1533 || (currentCharacter == '\n')) {
1534 checkNonExternalizeString();
1535 if (recordLineSeparator) {
1536 pushLineSeparator();
1541 try { //get the next char
1542 if (((currentCharacter = source[currentPosition++])
1544 && (source[currentPosition] == 'u')) {
1545 //-------------unicode traitement ------------
1546 getNextUnicodeChar();
1548 //handle the \\u case manually into comment
1549 if (currentCharacter == '\\') {
1550 if (source[currentPosition] == '\\')
1554 // empty comment is not a javadoc /**/
1555 if (currentCharacter == '/') {
1558 //loop until end of comment */
1559 while ((currentCharacter != '/') || (!star)) {
1560 if ((currentCharacter == '\r')
1561 || (currentCharacter == '\n')) {
1562 checkNonExternalizeString();
1563 if (recordLineSeparator) {
1564 pushLineSeparator();
1569 star = currentCharacter == '*';
1571 if (((currentCharacter = source[currentPosition++])
1573 && (source[currentPosition] == 'u')) {
1574 //-------------unicode traitement ------------
1575 getNextUnicodeChar();
1577 //handle the \\u case manually into comment
1578 if (currentCharacter == '\\') {
1579 if (source[currentPosition] == '\\')
1581 } //jump over the \\
1583 recordComment(isJavadoc);
1584 if (tokenizeComments) {
1586 return TokenNameCOMMENT_PHPDOC;
1587 return TokenNameCOMMENT_BLOCK;
1589 } catch (IndexOutOfBoundsException e) {
1590 throw new InvalidInputException(UNTERMINATED_COMMENT);
1594 if (getNextChar('='))
1595 return TokenNameDIVIDE_EQUAL;
1596 return TokenNameDIVIDE;
1600 return TokenNameEOF;
1601 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1602 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1605 if (currentCharacter == '$') {
1606 while ((currentCharacter = source[currentPosition++]) == '$') {
1608 if (currentCharacter == '{')
1609 return TokenNameDOLLAR_LBRACE;
1610 if (isPHPIdentifierStart(currentCharacter))
1611 return scanIdentifierOrKeyword(true);
1612 return TokenNameERROR;
1614 if (isPHPIdentifierStart(currentCharacter))
1615 return scanIdentifierOrKeyword(false);
1616 if (Character.isDigit(currentCharacter))
1617 return scanNumber(false);
1618 return TokenNameERROR;
1621 } //-----------------end switch while try--------------------
1622 catch (IndexOutOfBoundsException e) {
1625 return TokenNameEOF;
1628 public final void getNextUnicodeChar()
1629 throws IndexOutOfBoundsException, InvalidInputException {
1631 //handle the case of unicode.
1632 //when a unicode appears then we must use a buffer that holds char internal values
1633 //At the end of this method currentCharacter holds the new visited char
1634 //and currentPosition points right next after it
1636 //ALL getNextChar.... ARE OPTIMIZED COPIES
1638 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1640 while (source[currentPosition] == 'u') {
1645 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1647 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1649 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1651 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1653 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1655 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1656 //need the unicode buffer
1657 if (withoutUnicodePtr == 0) {
1658 //buffer all the entries that have been left aside....
1659 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1663 withoutUnicodeBuffer,
1667 //fill the buffer with the char
1668 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1670 unicodeAsBackSlash = currentCharacter == '\\';
1672 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1674 public final void jumpOverMethodBody() {
1676 this.wasAcr = false;
1679 while (true) { //loop for jumping over comments
1680 // ---------Consume white space and handles startPosition---------
1681 boolean isWhiteSpace;
1683 startPosition = currentPosition;
1684 if (((currentCharacter = source[currentPosition++]) == '\\')
1685 && (source[currentPosition] == 'u')) {
1686 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1688 if (recordLineSeparator
1689 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1690 pushLineSeparator();
1691 isWhiteSpace = Character.isWhitespace(currentCharacter);
1693 } while (isWhiteSpace);
1695 // -------consume token until } is found---------
1696 switch (currentCharacter) {
1708 test = getNextChar('\\');
1711 scanDoubleQuotedEscapeCharacter();
1712 } catch (InvalidInputException ex) {
1715 try { // consume next character
1716 unicodeAsBackSlash = false;
1717 if (((currentCharacter = source[currentPosition++]) == '\\')
1718 && (source[currentPosition] == 'u')) {
1719 getNextUnicodeChar();
1721 if (withoutUnicodePtr != 0) {
1722 withoutUnicodeBuffer[++withoutUnicodePtr] =
1726 } catch (InvalidInputException ex) {
1734 try { // consume next character
1735 unicodeAsBackSlash = false;
1736 if (((currentCharacter = source[currentPosition++]) == '\\')
1737 && (source[currentPosition] == 'u')) {
1738 getNextUnicodeChar();
1740 if (withoutUnicodePtr != 0) {
1741 withoutUnicodeBuffer[++withoutUnicodePtr] =
1745 } catch (InvalidInputException ex) {
1747 while (currentCharacter != '"') {
1748 if (currentCharacter == '\r') {
1749 if (source[currentPosition] == '\n')
1752 // the string cannot go further that the line
1754 if (currentCharacter == '\n') {
1756 // the string cannot go further that the line
1758 if (currentCharacter == '\\') {
1760 scanDoubleQuotedEscapeCharacter();
1761 } catch (InvalidInputException ex) {
1764 try { // consume next character
1765 unicodeAsBackSlash = false;
1766 if (((currentCharacter = source[currentPosition++]) == '\\')
1767 && (source[currentPosition] == 'u')) {
1768 getNextUnicodeChar();
1770 if (withoutUnicodePtr != 0) {
1771 withoutUnicodeBuffer[++withoutUnicodePtr] =
1775 } catch (InvalidInputException ex) {
1778 } catch (IndexOutOfBoundsException e) {
1785 if ((test = getNextChar('/', '*')) == 0) {
1789 if (((currentCharacter = source[currentPosition++]) == '\\')
1790 && (source[currentPosition] == 'u')) {
1791 //-------------unicode traitement ------------
1792 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1794 while (source[currentPosition] == 'u') {
1798 Character.getNumericValue(source[currentPosition++]))
1802 Character.getNumericValue(source[currentPosition++]))
1806 Character.getNumericValue(source[currentPosition++]))
1810 Character.getNumericValue(source[currentPosition++]))
1813 //error don't care of the value
1814 currentCharacter = 'A';
1815 } //something different from \n and \r
1818 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1822 while (currentCharacter != '\r'
1823 && currentCharacter != '\n') {
1825 if (((currentCharacter = source[currentPosition++])
1827 && (source[currentPosition] == 'u')) {
1828 //-------------unicode traitement ------------
1829 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1831 while (source[currentPosition] == 'u') {
1835 Character.getNumericValue(source[currentPosition++]))
1839 Character.getNumericValue(source[currentPosition++]))
1843 Character.getNumericValue(source[currentPosition++]))
1847 Character.getNumericValue(source[currentPosition++]))
1850 //error don't care of the value
1851 currentCharacter = 'A';
1852 } //something different from \n and \r
1855 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1859 if (recordLineSeparator
1860 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1861 pushLineSeparator();
1862 } catch (IndexOutOfBoundsException e) {
1863 } //an eof will them be generated
1867 //traditional and annotation comment
1868 boolean star = false;
1869 try { // consume next character
1870 unicodeAsBackSlash = false;
1871 if (((currentCharacter = source[currentPosition++]) == '\\')
1872 && (source[currentPosition] == 'u')) {
1873 getNextUnicodeChar();
1875 if (withoutUnicodePtr != 0) {
1876 withoutUnicodeBuffer[++withoutUnicodePtr] =
1880 } catch (InvalidInputException ex) {
1882 if (currentCharacter == '*') {
1885 if (recordLineSeparator
1886 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1887 pushLineSeparator();
1888 try { //get the next char
1889 if (((currentCharacter = source[currentPosition++]) == '\\')
1890 && (source[currentPosition] == 'u')) {
1891 //-------------unicode traitement ------------
1892 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1894 while (source[currentPosition] == 'u') {
1898 Character.getNumericValue(source[currentPosition++]))
1902 Character.getNumericValue(source[currentPosition++]))
1906 Character.getNumericValue(source[currentPosition++]))
1910 Character.getNumericValue(source[currentPosition++]))
1913 //error don't care of the value
1914 currentCharacter = 'A';
1915 } //something different from * and /
1918 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1921 //loop until end of comment */
1922 while ((currentCharacter != '/') || (!star)) {
1923 if (recordLineSeparator
1924 && ((currentCharacter == '\r')
1925 || (currentCharacter == '\n')))
1926 pushLineSeparator();
1927 star = currentCharacter == '*';
1929 if (((currentCharacter = source[currentPosition++])
1931 && (source[currentPosition] == 'u')) {
1932 //-------------unicode traitement ------------
1933 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1935 while (source[currentPosition] == 'u') {
1939 Character.getNumericValue(source[currentPosition++]))
1943 Character.getNumericValue(source[currentPosition++]))
1947 Character.getNumericValue(source[currentPosition++]))
1951 Character.getNumericValue(source[currentPosition++]))
1954 //error don't care of the value
1955 currentCharacter = 'A';
1956 } //something different from * and /
1959 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1963 } catch (IndexOutOfBoundsException e) {
1972 if (isPHPIdentifierStart(currentCharacter)
1973 || currentCharacter == '$') {
1975 scanIdentifierOrKeyword((currentCharacter == '$'));
1976 } catch (InvalidInputException ex) {
1980 if (Character.isDigit(currentCharacter)) {
1983 } catch (InvalidInputException ex) {
1989 //-----------------end switch while try--------------------
1990 } catch (IndexOutOfBoundsException e) {
1991 } catch (InvalidInputException e) {
1995 public final boolean jumpOverUnicodeWhiteSpace()
1996 throws InvalidInputException {
1998 //handle the case of unicode. Jump over the next whiteSpace
1999 //making startPosition pointing on the next available char
2000 //On false, the currentCharacter is filled up with a potential
2004 this.wasAcr = false;
2006 int unicodeSize = 6;
2008 while (source[currentPosition] == 'u') {
2013 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2015 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2017 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2019 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2021 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2024 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2025 if (recordLineSeparator
2026 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2027 pushLineSeparator();
2028 if (Character.isWhitespace(currentCharacter))
2031 //buffer the new char which is not a white space
2032 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2033 //withoutUnicodePtr == 1 is true here
2035 } catch (IndexOutOfBoundsException e) {
2036 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2039 public final int[] getLineEnds() {
2040 //return a bounded copy of this.lineEnds
2043 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2047 public char[] getSource() {
2050 final char[] optimizedCurrentTokenSource1() {
2051 //return always the same char[] build only once
2053 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2054 char charOne = source[startPosition];
2109 return new char[] { charOne };
2113 final char[] optimizedCurrentTokenSource2() {
2114 //try to return the same char[] build only once
2118 (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2120 char[][] table = charArray_length[0][hash];
2122 while (++i < InternalTableSize) {
2123 char[] charArray = table[i];
2124 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2127 //---------other side---------
2129 int max = newEntry2;
2130 while (++i <= max) {
2131 char[] charArray = table[i];
2132 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2135 //--------add the entry-------
2136 if (++max >= InternalTableSize)
2139 table[max] = (r = new char[] { c0, c1 });
2144 final char[] optimizedCurrentTokenSource3() {
2145 //try to return the same char[] build only once
2149 (((c0 = source[startPosition]) << 12)
2150 + ((c1 = source[startPosition + 1]) << 6)
2151 + (c2 = source[startPosition + 2]))
2153 char[][] table = charArray_length[1][hash];
2155 while (++i < InternalTableSize) {
2156 char[] charArray = table[i];
2157 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2160 //---------other side---------
2162 int max = newEntry3;
2163 while (++i <= max) {
2164 char[] charArray = table[i];
2165 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2168 //--------add the entry-------
2169 if (++max >= InternalTableSize)
2172 table[max] = (r = new char[] { c0, c1, c2 });
2177 final char[] optimizedCurrentTokenSource4() {
2178 //try to return the same char[] build only once
2180 char c0, c1, c2, c3;
2182 ((((long) (c0 = source[startPosition])) << 18)
2183 + ((c1 = source[startPosition + 1]) << 12)
2184 + ((c2 = source[startPosition + 2]) << 6)
2185 + (c3 = source[startPosition + 3]))
2187 char[][] table = charArray_length[2][(int) hash];
2189 while (++i < InternalTableSize) {
2190 char[] charArray = table[i];
2191 if ((c0 == charArray[0])
2192 && (c1 == charArray[1])
2193 && (c2 == charArray[2])
2194 && (c3 == charArray[3]))
2197 //---------other side---------
2199 int max = newEntry4;
2200 while (++i <= max) {
2201 char[] charArray = table[i];
2202 if ((c0 == charArray[0])
2203 && (c1 == charArray[1])
2204 && (c2 == charArray[2])
2205 && (c3 == charArray[3]))
2208 //--------add the entry-------
2209 if (++max >= InternalTableSize)
2212 table[max] = (r = new char[] { c0, c1, c2, c3 });
2218 final char[] optimizedCurrentTokenSource5() {
2219 //try to return the same char[] build only once
2221 char c0, c1, c2, c3, c4;
2223 ((((long) (c0 = source[startPosition])) << 24)
2224 + (((long) (c1 = source[startPosition + 1])) << 18)
2225 + ((c2 = source[startPosition + 2]) << 12)
2226 + ((c3 = source[startPosition + 3]) << 6)
2227 + (c4 = source[startPosition + 4]))
2229 char[][] table = charArray_length[3][(int) hash];
2231 while (++i < InternalTableSize) {
2232 char[] charArray = table[i];
2233 if ((c0 == charArray[0])
2234 && (c1 == charArray[1])
2235 && (c2 == charArray[2])
2236 && (c3 == charArray[3])
2237 && (c4 == charArray[4]))
2240 //---------other side---------
2242 int max = newEntry5;
2243 while (++i <= max) {
2244 char[] charArray = table[i];
2245 if ((c0 == charArray[0])
2246 && (c1 == charArray[1])
2247 && (c2 == charArray[2])
2248 && (c3 == charArray[3])
2249 && (c4 == charArray[4]))
2252 //--------add the entry-------
2253 if (++max >= InternalTableSize)
2256 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2262 final char[] optimizedCurrentTokenSource6() {
2263 //try to return the same char[] build only once
2265 char c0, c1, c2, c3, c4, c5;
2267 ((((long) (c0 = source[startPosition])) << 32)
2268 + (((long) (c1 = source[startPosition + 1])) << 24)
2269 + (((long) (c2 = source[startPosition + 2])) << 18)
2270 + ((c3 = source[startPosition + 3]) << 12)
2271 + ((c4 = source[startPosition + 4]) << 6)
2272 + (c5 = source[startPosition + 5]))
2274 char[][] table = charArray_length[4][(int) hash];
2276 while (++i < InternalTableSize) {
2277 char[] charArray = table[i];
2278 if ((c0 == charArray[0])
2279 && (c1 == charArray[1])
2280 && (c2 == charArray[2])
2281 && (c3 == charArray[3])
2282 && (c4 == charArray[4])
2283 && (c5 == charArray[5]))
2286 //---------other side---------
2288 int max = newEntry6;
2289 while (++i <= max) {
2290 char[] charArray = table[i];
2291 if ((c0 == charArray[0])
2292 && (c1 == charArray[1])
2293 && (c2 == charArray[2])
2294 && (c3 == charArray[3])
2295 && (c4 == charArray[4])
2296 && (c5 == charArray[5]))
2299 //--------add the entry-------
2300 if (++max >= InternalTableSize)
2303 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2308 public final void pushLineSeparator() throws InvalidInputException {
2309 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2310 final int INCREMENT = 250;
2312 if (this.checkNonExternalizedStringLiterals) {
2313 // reinitialize the current line for non externalize strings purpose
2316 //currentCharacter is at position currentPosition-1
2319 if (currentCharacter == '\r') {
2320 int separatorPos = currentPosition - 1;
2321 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2323 //System.out.println("CR-" + separatorPos);
2325 lineEnds[++linePtr] = separatorPos;
2326 } catch (IndexOutOfBoundsException e) {
2327 //linePtr value is correct
2328 int oldLength = lineEnds.length;
2329 int[] old = lineEnds;
2330 lineEnds = new int[oldLength + INCREMENT];
2331 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2332 lineEnds[linePtr] = separatorPos;
2334 // look-ahead for merged cr+lf
2336 if (source[currentPosition] == '\n') {
2337 //System.out.println("look-ahead LF-" + currentPosition);
2338 lineEnds[linePtr] = currentPosition;
2344 } catch (IndexOutOfBoundsException e) {
2349 if (currentCharacter == '\n') {
2350 //must merge eventual cr followed by lf
2351 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2352 //System.out.println("merge LF-" + (currentPosition - 1));
2353 lineEnds[linePtr] = currentPosition - 1;
2355 int separatorPos = currentPosition - 1;
2356 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2358 // System.out.println("LF-" + separatorPos);
2360 lineEnds[++linePtr] = separatorPos;
2361 } catch (IndexOutOfBoundsException e) {
2362 //linePtr value is correct
2363 int oldLength = lineEnds.length;
2364 int[] old = lineEnds;
2365 lineEnds = new int[oldLength + INCREMENT];
2366 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2367 lineEnds[linePtr] = separatorPos;
2374 public final void pushUnicodeLineSeparator() {
2375 // isUnicode means that the \r or \n has been read as a unicode character
2377 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2379 final int INCREMENT = 250;
2380 //currentCharacter is at position currentPosition-1
2382 if (this.checkNonExternalizedStringLiterals) {
2383 // reinitialize the current line for non externalize strings purpose
2388 if (currentCharacter == '\r') {
2389 int separatorPos = currentPosition - 6;
2390 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2392 //System.out.println("CR-" + separatorPos);
2394 lineEnds[++linePtr] = separatorPos;
2395 } catch (IndexOutOfBoundsException e) {
2396 //linePtr value is correct
2397 int oldLength = lineEnds.length;
2398 int[] old = lineEnds;
2399 lineEnds = new int[oldLength + INCREMENT];
2400 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2401 lineEnds[linePtr] = separatorPos;
2403 // look-ahead for merged cr+lf
2404 if (source[currentPosition] == '\n') {
2405 //System.out.println("look-ahead LF-" + currentPosition);
2406 lineEnds[linePtr] = currentPosition;
2414 if (currentCharacter == '\n') {
2415 //must merge eventual cr followed by lf
2416 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2417 //System.out.println("merge LF-" + (currentPosition - 1));
2418 lineEnds[linePtr] = currentPosition - 6;
2420 int separatorPos = currentPosition - 6;
2421 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2423 // System.out.println("LF-" + separatorPos);
2425 lineEnds[++linePtr] = separatorPos;
2426 } catch (IndexOutOfBoundsException e) {
2427 //linePtr value is correct
2428 int oldLength = lineEnds.length;
2429 int[] old = lineEnds;
2430 lineEnds = new int[oldLength + INCREMENT];
2431 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2432 lineEnds[linePtr] = separatorPos;
2439 public final void recordComment(boolean isJavadoc) {
2441 // a new annotation comment is recorded
2443 commentStops[++commentPtr] =
2444 isJavadoc ? currentPosition : -currentPosition;
2445 } catch (IndexOutOfBoundsException e) {
2446 int oldStackLength = commentStops.length;
2447 int[] oldStack = commentStops;
2448 commentStops = new int[oldStackLength + 30];
2449 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2450 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2451 //grows the positions buffers too
2452 int[] old = commentStarts;
2453 commentStarts = new int[oldStackLength + 30];
2454 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2457 //the buffer is of a correct size here
2458 commentStarts[commentPtr] = startPosition;
2460 public void resetTo(int begin, int end) {
2461 //reset the scanner to a given position where it may rescan again
2464 initialPosition = startPosition = currentPosition = begin;
2465 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2466 commentPtr = -1; // reset comment stack
2469 public final void scanSingleQuotedEscapeCharacter()
2470 throws InvalidInputException {
2471 // the string with "\\u" is a legal string of two chars \ and u
2472 //thus we use a direct access to the source (for regular cases).
2474 if (unicodeAsBackSlash) {
2475 // consume next character
2476 unicodeAsBackSlash = false;
2477 if (((currentCharacter = source[currentPosition++]) == '\\')
2478 && (source[currentPosition] == 'u')) {
2479 getNextUnicodeChar();
2481 if (withoutUnicodePtr != 0) {
2482 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2486 currentCharacter = source[currentPosition++];
2487 switch (currentCharacter) {
2489 currentCharacter = '\'';
2492 currentCharacter = '\\';
2495 currentCharacter = '\\';
2500 public final void scanDoubleQuotedEscapeCharacter()
2501 throws InvalidInputException {
2502 // the string with "\\u" is a legal string of two chars \ and u
2503 //thus we use a direct access to the source (for regular cases).
2505 if (unicodeAsBackSlash) {
2506 // consume next character
2507 unicodeAsBackSlash = false;
2508 if (((currentCharacter = source[currentPosition++]) == '\\')
2509 && (source[currentPosition] == 'u')) {
2510 getNextUnicodeChar();
2512 if (withoutUnicodePtr != 0) {
2513 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2517 currentCharacter = source[currentPosition++];
2518 switch (currentCharacter) {
2520 // currentCharacter = '\b';
2523 currentCharacter = '\t';
2526 currentCharacter = '\n';
2529 // currentCharacter = '\f';
2532 currentCharacter = '\r';
2535 currentCharacter = '\"';
2538 currentCharacter = '\'';
2541 currentCharacter = '\\';
2544 currentCharacter = '$';
2547 // -----------octal escape--------------
2549 // OctalDigit OctalDigit
2550 // ZeroToThree OctalDigit OctalDigit
2552 int number = Character.getNumericValue(currentCharacter);
2553 if (number >= 0 && number <= 7) {
2554 boolean zeroToThreeNot = number > 3;
2556 .isDigit(currentCharacter = source[currentPosition++])) {
2557 int digit = Character.getNumericValue(currentCharacter);
2558 if (digit >= 0 && digit <= 7) {
2559 number = (number * 8) + digit;
2561 .isDigit(currentCharacter = source[currentPosition++])) {
2562 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2565 digit = Character.getNumericValue(currentCharacter);
2566 if (digit >= 0 && digit <= 7) {
2567 // has read \ZeroToThree OctalDigit OctalDigit
2568 number = (number * 8) + digit;
2569 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2573 } else { // has read \OctalDigit NonDigit--> ignore last character
2576 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2579 } else { // has read \OctalDigit --> ignore last character
2583 throw new InvalidInputException(INVALID_ESCAPE);
2584 currentCharacter = (char) number;
2587 // throw new InvalidInputException(INVALID_ESCAPE);
2591 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2592 // return scanIdentifierOrKeyword( false );
2595 public int scanIdentifierOrKeyword(boolean isVariable)
2596 throws InvalidInputException {
2599 //first dispatch on the first char.
2600 //then the length. If there are several
2601 //keywors with the same length AND the same first char, then do another
2602 //disptach on the second char :-)...cool....but fast !
2604 useAssertAsAnIndentifier = false;
2606 while (getNextCharAsJavaIdentifierPart()) {
2610 return TokenNameVariable;
2615 if (withoutUnicodePtr == 0)
2617 //quick test on length == 1 but not on length > 12 while most identifier
2618 //have a length which is <= 12...but there are lots of identifier with
2622 if ((length = currentPosition - startPosition) == 1)
2623 return TokenNameIdentifier;
2625 data = new char[length];
2626 index = startPosition;
2627 for (int i = 0; i < length; i++) {
2628 data[i] = Character.toLowerCase(source[index + i]);
2632 if ((length = withoutUnicodePtr) == 1)
2633 return TokenNameIdentifier;
2634 // data = withoutUnicodeBuffer;
2635 data = new char[withoutUnicodeBuffer.length];
2636 for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2637 data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2642 firstLetter = data[index];
2643 switch (firstLetter) {
2645 case 'a' : // as and array
2648 if ((data[++index] == 's')) {
2651 return TokenNameIdentifier;
2654 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2655 return TokenNameAND;
2657 return TokenNameIdentifier;
2660 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2661 // return TokenNamearray;
2663 // return TokenNameIdentifier;
2665 return TokenNameIdentifier;
2670 if ((data[++index] == 'r')
2671 && (data[++index] == 'e')
2672 && (data[++index] == 'a')
2673 && (data[++index] == 'k'))
2674 return TokenNamebreak;
2676 return TokenNameIdentifier;
2678 return TokenNameIdentifier;
2681 case 'c' : //case class continue
2684 if ((data[++index] == 'a')
2685 && (data[++index] == 's')
2686 && (data[++index] == 'e'))
2687 return TokenNamecase;
2689 return TokenNameIdentifier;
2691 if ((data[++index] == 'l')
2692 && (data[++index] == 'a')
2693 && (data[++index] == 's')
2694 && (data[++index] == 's'))
2695 return TokenNameclass;
2697 return TokenNameIdentifier;
2699 if ((data[++index] == 'o')
2700 && (data[++index] == 'n')
2701 && (data[++index] == 't')
2702 && (data[++index] == 'i')
2703 && (data[++index] == 'n')
2704 && (data[++index] == 'u')
2705 && (data[++index] == 'e'))
2706 return TokenNamecontinue;
2708 return TokenNameIdentifier;
2710 return TokenNameIdentifier;
2713 case 'd' : //define default do
2716 if ((data[++index] == 'o'))
2719 return TokenNameIdentifier;
2721 if ((data[++index] == 'e')
2722 && (data[++index] == 'f')
2723 && (data[++index] == 'i')
2724 && (data[++index] == 'n')
2725 && (data[++index] == 'e'))
2726 return TokenNamedefine;
2728 return TokenNameIdentifier;
2730 if ((data[++index] == 'e')
2731 && (data[++index] == 'f')
2732 && (data[++index] == 'a')
2733 && (data[++index] == 'u')
2734 && (data[++index] == 'l')
2735 && (data[++index] == 't'))
2736 return TokenNamedefault;
2738 return TokenNameIdentifier;
2740 return TokenNameIdentifier;
2742 case 'e' : //echo else elseif extends
2745 if ((data[++index] == 'c')
2746 && (data[++index] == 'h')
2747 && (data[++index] == 'o'))
2748 return TokenNameecho;
2750 (data[index] == 'l')
2751 && (data[++index] == 's')
2752 && (data[++index] == 'e'))
2753 return TokenNameelse;
2755 return TokenNameIdentifier;
2757 if ((data[++index] == 'n')
2758 && (data[++index] == 'd')
2759 && (data[++index] == 'i')
2760 && (data[++index] == 'f'))
2761 return TokenNameendif;
2763 return TokenNameIdentifier;
2765 if ((data[++index] == 'n')
2766 && (data[++index] == 'd')
2767 && (data[++index] == 'f')
2768 && (data[++index] == 'o')
2769 && (data[++index] == 'r'))
2770 return TokenNameendfor;
2772 (data[index] == 'l')
2773 && (data[++index] == 's')
2774 && (data[++index] == 'e')
2775 && (data[++index] == 'i')
2776 && (data[++index] == 'f'))
2777 return TokenNameelseif;
2779 return TokenNameIdentifier;
2781 if ((data[++index] == 'x')
2782 && (data[++index] == 't')
2783 && (data[++index] == 'e')
2784 && (data[++index] == 'n')
2785 && (data[++index] == 'd')
2786 && (data[++index] == 's'))
2787 return TokenNameextends;
2789 return TokenNameIdentifier;
2790 case 8 : // endwhile
2791 if ((data[++index] == 'n')
2792 && (data[++index] == 'd')
2793 && (data[++index] == 'w')
2794 && (data[++index] == 'h')
2795 && (data[++index] == 'i')
2796 && (data[++index] == 'l')
2797 && (data[++index] == 'e'))
2798 return TokenNameendwhile;
2800 return TokenNameIdentifier;
2801 case 9 : // endswitch
2802 if ((data[++index] == 'n')
2803 && (data[++index] == 'd')
2804 && (data[++index] == 's')
2805 && (data[++index] == 'w')
2806 && (data[++index] == 'i')
2807 && (data[++index] == 't')
2808 && (data[++index] == 'c')
2809 && (data[++index] == 'h'))
2810 return TokenNameendswitch;
2812 return TokenNameIdentifier;
2813 case 10 : // endforeach
2814 if ((data[++index] == 'n')
2815 && (data[++index] == 'd')
2816 && (data[++index] == 'f')
2817 && (data[++index] == 'o')
2818 && (data[++index] == 'r')
2819 && (data[++index] == 'e')
2820 && (data[++index] == 'a')
2821 && (data[++index] == 'c')
2822 && (data[++index] == 'h'))
2823 return TokenNameendforeach;
2825 return TokenNameIdentifier;
2828 return TokenNameIdentifier;
2831 case 'f' : //for false function
2834 if ((data[++index] == 'o') && (data[++index] == 'r'))
2835 return TokenNamefor;
2837 return TokenNameIdentifier;
2839 if ((data[++index] == 'a')
2840 && (data[++index] == 'l')
2841 && (data[++index] == 's')
2842 && (data[++index] == 'e'))
2843 return TokenNamefalse;
2845 return TokenNameIdentifier;
2846 case 7 : // function
2847 if ((data[++index] == 'o')
2848 && (data[++index] == 'r')
2849 && (data[++index] == 'e')
2850 && (data[++index] == 'a')
2851 && (data[++index] == 'c')
2852 && (data[++index] == 'h'))
2853 return TokenNameforeach;
2855 return TokenNameIdentifier;
2856 case 8 : // function
2857 if ((data[++index] == 'u')
2858 && (data[++index] == 'n')
2859 && (data[++index] == 'c')
2860 && (data[++index] == 't')
2861 && (data[++index] == 'i')
2862 && (data[++index] == 'o')
2863 && (data[++index] == 'n'))
2864 return TokenNamefunction;
2866 return TokenNameIdentifier;
2868 return TokenNameIdentifier;
2872 if ((data[++index] == 'l')
2873 && (data[++index] == 'o')
2874 && (data[++index] == 'b')
2875 && (data[++index] == 'a')
2876 && (data[++index] == 'l')) {
2877 return TokenNameglobal;
2880 return TokenNameIdentifier;
2885 if (data[++index] == 'f')
2888 return TokenNameIdentifier;
2890 // if ((data[++index] == 'n') && (data[++index] == 't'))
2891 // return TokenNameint;
2893 // return TokenNameIdentifier;
2895 if ((data[++index] == 'n')
2896 && (data[++index] == 'c')
2897 && (data[++index] == 'l')
2898 && (data[++index] == 'u')
2899 && (data[++index] == 'd')
2900 && (data[++index] == 'e'))
2901 return TokenNameinclude;
2903 return TokenNameIdentifier;
2905 if ((data[++index] == 'n')
2906 && (data[++index] == 'c')
2907 && (data[++index] == 'l')
2908 && (data[++index] == 'u')
2909 && (data[++index] == 'd')
2910 && (data[++index] == 'e')
2911 && (data[++index] == '_')
2912 && (data[++index] == 'o')
2913 && (data[++index] == 'n')
2914 && (data[++index] == 'c')
2915 && (data[++index] == 'e'))
2916 return TokenNameinclude_once;
2918 return TokenNameIdentifier;
2920 return TokenNameIdentifier;
2925 if ((data[++index] == 'i')
2926 && (data[++index] == 's')
2927 && (data[++index] == 't')) {
2928 return TokenNamelist;
2931 return TokenNameIdentifier;
2933 case 'n' : // new null
2936 if ((data[++index] == 'e') && (data[++index] == 'w'))
2937 return TokenNamenew;
2939 return TokenNameIdentifier;
2941 if ((data[++index] == 'u')
2942 && (data[++index] == 'l')
2943 && (data[++index] == 'l'))
2944 return TokenNamenull;
2946 return TokenNameIdentifier;
2949 return TokenNameIdentifier;
2951 case 'o' : // or old_function
2953 if (data[++index] == 'r') {
2957 // if (length == 12) {
2958 // if ((data[++index] == 'l')
2959 // && (data[++index] == 'd')
2960 // && (data[++index] == '_')
2961 // && (data[++index] == 'f')
2962 // && (data[++index] == 'u')
2963 // && (data[++index] == 'n')
2964 // && (data[++index] == 'c')
2965 // && (data[++index] == 't')
2966 // && (data[++index] == 'i')
2967 // && (data[++index] == 'o')
2968 // && (data[++index] == 'n')) {
2969 // return TokenNameold_function;
2972 return TokenNameIdentifier;
2976 if ((data[++index] == 'r')
2977 && (data[++index] == 'i')
2978 && (data[++index] == 'n')
2979 && (data[++index] == 't')) {
2980 return TokenNameprint;
2983 return TokenNameIdentifier;
2984 case 'r' : //return require require_once
2986 if ((data[++index] == 'e')
2987 && (data[++index] == 't')
2988 && (data[++index] == 'u')
2989 && (data[++index] == 'r')
2990 && (data[++index] == 'n')) {
2991 return TokenNamereturn;
2993 } else if (length == 7) {
2994 if ((data[++index] == 'e')
2995 && (data[++index] == 'q')
2996 && (data[++index] == 'u')
2997 && (data[++index] == 'i')
2998 && (data[++index] == 'r')
2999 && (data[++index] == 'e')) {
3000 return TokenNamerequire;
3002 } else if (length == 12) {
3003 if ((data[++index] == 'e')
3004 && (data[++index] == 'q')
3005 && (data[++index] == 'u')
3006 && (data[++index] == 'i')
3007 && (data[++index] == 'r')
3008 && (data[++index] == 'e')
3009 && (data[++index] == '_')
3010 && (data[++index] == 'o')
3011 && (data[++index] == 'n')
3012 && (data[++index] == 'c')
3013 && (data[++index] == 'e')) {
3014 return TokenNamerequire_once;
3017 return TokenNameIdentifier;
3019 case 's' : //static switch
3022 if (data[++index] == 't')
3023 if ((data[++index] == 'a')
3024 && (data[++index] == 't')
3025 && (data[++index] == 'i')
3026 && (data[++index] == 'c')) {
3027 return TokenNamestatic;
3029 return TokenNameIdentifier;
3031 (data[index] == 'w')
3032 && (data[++index] == 'i')
3033 && (data[++index] == 't')
3034 && (data[++index] == 'c')
3035 && (data[++index] == 'h'))
3036 return TokenNameswitch;
3038 return TokenNameIdentifier;
3040 return TokenNameIdentifier;
3047 if ((data[++index] == 'r')
3048 && (data[++index] == 'u')
3049 && (data[++index] == 'e'))
3050 return TokenNametrue;
3052 return TokenNameIdentifier;
3053 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3054 // return TokenNamethis;
3057 return TokenNameIdentifier;
3063 if ((data[++index] == 'a') && (data[++index] == 'r'))
3064 return TokenNamevar;
3066 return TokenNameIdentifier;
3069 return TokenNameIdentifier;
3075 if ((data[++index] == 'h')
3076 && (data[++index] == 'i')
3077 && (data[++index] == 'l')
3078 && (data[++index] == 'e'))
3079 return TokenNamewhile;
3081 return TokenNameIdentifier;
3082 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3083 //return TokenNamewidefp ;
3085 //return TokenNameIdentifier;
3087 return TokenNameIdentifier;
3093 if ((data[++index] == 'o') && (data[++index] == 'r'))
3094 return TokenNameXOR;
3096 return TokenNameIdentifier;
3099 return TokenNameIdentifier;
3102 return TokenNameIdentifier;
3105 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3107 //when entering this method the currentCharacter is the firt
3108 //digit of the number , i.e. it may be preceeded by a . when
3111 boolean floating = dotPrefix;
3112 if ((!dotPrefix) && (currentCharacter == '0')) {
3113 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3114 //force the first char of the hexa number do exist...
3115 // consume next character
3116 unicodeAsBackSlash = false;
3117 if (((currentCharacter = source[currentPosition++]) == '\\')
3118 && (source[currentPosition] == 'u')) {
3119 getNextUnicodeChar();
3121 if (withoutUnicodePtr != 0) {
3122 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3125 if (Character.digit(currentCharacter, 16) == -1)
3126 throw new InvalidInputException(INVALID_HEXA);
3128 while (getNextCharAsDigit(16)) {
3130 // if (getNextChar('l', 'L') >= 0)
3131 // return TokenNameLongLiteral;
3133 return TokenNameIntegerLiteral;
3136 //there is x or X in the number
3137 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3138 if (getNextCharAsDigit()) {
3139 //-------------potential octal-----------------
3140 while (getNextCharAsDigit()) {
3143 // if (getNextChar('l', 'L') >= 0) {
3144 // return TokenNameLongLiteral;
3147 // if (getNextChar('f', 'F') >= 0) {
3148 // return TokenNameFloatingPointLiteral;
3151 if (getNextChar('d', 'D') >= 0) {
3152 return TokenNameDoubleLiteral;
3153 } else { //make the distinction between octal and float ....
3154 if (getNextChar('.')) { //bingo ! ....
3155 while (getNextCharAsDigit()) {
3157 if (getNextChar('e', 'E') >= 0) {
3158 // consume next character
3159 unicodeAsBackSlash = false;
3160 if (((currentCharacter = source[currentPosition++]) == '\\')
3161 && (source[currentPosition] == 'u')) {
3162 getNextUnicodeChar();
3164 if (withoutUnicodePtr != 0) {
3165 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3169 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3170 // consume next character
3171 unicodeAsBackSlash = false;
3172 if (((currentCharacter = source[currentPosition++]) == '\\')
3173 && (source[currentPosition] == 'u')) {
3174 getNextUnicodeChar();
3176 if (withoutUnicodePtr != 0) {
3177 withoutUnicodeBuffer[++withoutUnicodePtr] =
3182 if (!Character.isDigit(currentCharacter))
3183 throw new InvalidInputException(INVALID_FLOAT);
3184 while (getNextCharAsDigit()) {
3187 // if (getNextChar('f', 'F') >= 0)
3188 // return TokenNameFloatingPointLiteral;
3189 getNextChar('d', 'D'); //jump over potential d or D
3190 return TokenNameDoubleLiteral;
3192 return TokenNameIntegerLiteral;
3200 while (getNextCharAsDigit()) {
3203 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3204 // return TokenNameLongLiteral;
3206 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3207 while (getNextCharAsDigit()) {
3212 //if floating is true both exponant and suffix may be optional
3214 if (getNextChar('e', 'E') >= 0) {
3216 // consume next character
3217 unicodeAsBackSlash = false;
3218 if (((currentCharacter = source[currentPosition++]) == '\\')
3219 && (source[currentPosition] == 'u')) {
3220 getNextUnicodeChar();
3222 if (withoutUnicodePtr != 0) {
3223 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3227 if ((currentCharacter == '-')
3228 || (currentCharacter == '+')) { // consume next character
3229 unicodeAsBackSlash = false;
3230 if (((currentCharacter = source[currentPosition++]) == '\\')
3231 && (source[currentPosition] == 'u')) {
3232 getNextUnicodeChar();
3234 if (withoutUnicodePtr != 0) {
3235 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3239 if (!Character.isDigit(currentCharacter))
3240 throw new InvalidInputException(INVALID_FLOAT);
3241 while (getNextCharAsDigit()) {
3245 if (getNextChar('d', 'D') >= 0)
3246 return TokenNameDoubleLiteral;
3247 // if (getNextChar('f', 'F') >= 0)
3248 // return TokenNameFloatingPointLiteral;
3250 //the long flag has been tested before
3252 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3255 * Search the line number corresponding to a specific position
3258 public final int getLineNumber(int position) {
3260 if (lineEnds == null)
3262 int length = linePtr + 1;
3265 int g = 0, d = length - 1;
3269 if (position < lineEnds[m]) {
3271 } else if (position > lineEnds[m]) {
3277 if (position < lineEnds[m]) {
3283 public void setPHPMode(boolean mode) {
3287 public final void setSource(char[] source) {
3288 //the source-buffer is set to sourceString
3290 if (source == null) {
3291 this.source = new char[0];
3293 this.source = source;
3296 initialPosition = currentPosition = 0;
3297 containsAssertKeyword = false;
3298 withoutUnicodeBuffer = new char[this.source.length];
3302 public String toString() {
3303 if (startPosition == source.length)
3304 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3305 if (currentPosition > source.length)
3306 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3308 char front[] = new char[startPosition];
3309 System.arraycopy(source, 0, front, 0, startPosition);
3311 int middleLength = (currentPosition - 1) - startPosition + 1;
3313 if (middleLength > -1) {
3314 middle = new char[middleLength];
3315 System.arraycopy(source, startPosition, middle, 0, middleLength);
3317 middle = new char[0];
3320 char end[] = new char[source.length - (currentPosition - 1)];
3323 (currentPosition - 1) + 1,
3326 source.length - (currentPosition - 1) - 1);
3328 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3329 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3332 public final String toStringAction(int act) {
3334 case TokenNameERROR :
3335 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3336 case TokenNameStopPHP :
3337 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3338 case TokenNameIdentifier :
3339 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3340 case TokenNameVariable :
3341 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3343 return "as"; //$NON-NLS-1$
3344 case TokenNamebreak :
3345 return "break"; //$NON-NLS-1$
3346 case TokenNamecase :
3347 return "case"; //$NON-NLS-1$
3348 case TokenNameclass :
3349 return "class"; //$NON-NLS-1$
3350 case TokenNamecontinue :
3351 return "continue"; //$NON-NLS-1$
3352 case TokenNamedefault :
3353 return "default"; //$NON-NLS-1$
3354 case TokenNamedefine :
3355 return "define"; //$NON-NLS-1$
3357 return "do"; //$NON-NLS-1$
3358 case TokenNameecho :
3359 return "echo"; //$NON-NLS-1$
3360 case TokenNameelse :
3361 return "else"; //$NON-NLS-1$
3362 case TokenNameelseif :
3363 return "elseif"; //$NON-NLS-1$
3364 case TokenNameendfor :
3365 return "endfor"; //$NON-NLS-1$
3366 case TokenNameendforeach :
3367 return "endforeach"; //$NON-NLS-1$
3368 case TokenNameendif :
3369 return "endif"; //$NON-NLS-1$
3370 case TokenNameendswitch :
3371 return "endswitch"; //$NON-NLS-1$
3372 case TokenNameendwhile :
3373 return "endwhile"; //$NON-NLS-1$
3374 case TokenNameextends :
3375 return "extends"; //$NON-NLS-1$
3376 case TokenNamefalse :
3377 return "false"; //$NON-NLS-1$
3379 return "for"; //$NON-NLS-1$
3380 case TokenNameforeach :
3381 return "foreach"; //$NON-NLS-1$
3382 case TokenNamefunction :
3383 return "function"; //$NON-NLS-1$
3384 case TokenNameglobal :
3385 return "global"; //$NON-NLS-1$
3387 return "if"; //$NON-NLS-1$
3388 case TokenNameinclude :
3389 return "include"; //$NON-NLS-1$
3390 case TokenNameinclude_once :
3391 return "include_once"; //$NON-NLS-1$
3392 case TokenNamelist :
3393 return "list"; //$NON-NLS-1$
3395 return "new"; //$NON-NLS-1$
3396 case TokenNamenull :
3397 return "null"; //$NON-NLS-1$
3398 case TokenNameprint :
3399 return "print"; //$NON-NLS-1$
3400 case TokenNamerequire :
3401 return "require"; //$NON-NLS-1$
3402 case TokenNamerequire_once :
3403 return "require_once"; //$NON-NLS-1$
3404 case TokenNamereturn :
3405 return "return"; //$NON-NLS-1$
3406 case TokenNamestatic :
3407 return "static"; //$NON-NLS-1$
3408 case TokenNameswitch :
3409 return "switch"; //$NON-NLS-1$
3410 case TokenNametrue :
3411 return "true"; //$NON-NLS-1$
3413 return "var"; //$NON-NLS-1$
3414 case TokenNamewhile :
3415 return "while"; //$NON-NLS-1$
3416 case TokenNameIntegerLiteral :
3417 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3418 case TokenNameDoubleLiteral :
3419 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3420 case TokenNameStringLiteral :
3421 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3422 case TokenNameStringConstant :
3423 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3424 case TokenNameStringInterpolated :
3425 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3426 case TokenNameHEREDOC :
3427 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3429 case TokenNamePLUS_PLUS :
3430 return "++"; //$NON-NLS-1$
3431 case TokenNameMINUS_MINUS :
3432 return "--"; //$NON-NLS-1$
3433 case TokenNameEQUAL_EQUAL :
3434 return "=="; //$NON-NLS-1$
3435 case TokenNameEQUAL_GREATER :
3436 return "=>"; //$NON-NLS-1$
3437 case TokenNameLESS_EQUAL :
3438 return "<="; //$NON-NLS-1$
3439 case TokenNameGREATER_EQUAL :
3440 return ">="; //$NON-NLS-1$
3441 case TokenNameNOT_EQUAL :
3442 return "!="; //$NON-NLS-1$
3443 case TokenNameLEFT_SHIFT :
3444 return "<<"; //$NON-NLS-1$
3445 case TokenNameRIGHT_SHIFT :
3446 return ">>"; //$NON-NLS-1$
3447 case TokenNamePLUS_EQUAL :
3448 return "+="; //$NON-NLS-1$
3449 case TokenNameMINUS_EQUAL :
3450 return "-="; //$NON-NLS-1$
3451 case TokenNameMULTIPLY_EQUAL :
3452 return "*="; //$NON-NLS-1$
3453 case TokenNameDIVIDE_EQUAL :
3454 return "/="; //$NON-NLS-1$
3455 case TokenNameAND_EQUAL :
3456 return "&="; //$NON-NLS-1$
3457 case TokenNameOR_EQUAL :
3458 return "|="; //$NON-NLS-1$
3459 case TokenNameXOR_EQUAL :
3460 return "^="; //$NON-NLS-1$
3461 case TokenNameREMAINDER_EQUAL :
3462 return "%="; //$NON-NLS-1$
3463 case TokenNameLEFT_SHIFT_EQUAL :
3464 return "<<="; //$NON-NLS-1$
3465 case TokenNameRIGHT_SHIFT_EQUAL :
3466 return ">>="; //$NON-NLS-1$
3467 case TokenNameOR_OR :
3468 return "||"; //$NON-NLS-1$
3469 case TokenNameAND_AND :
3470 return "&&"; //$NON-NLS-1$
3471 case TokenNamePLUS :
3472 return "+"; //$NON-NLS-1$
3473 case TokenNameMINUS :
3474 return "-"; //$NON-NLS-1$
3475 case TokenNameMINUS_GREATER :
3478 return "!"; //$NON-NLS-1$
3479 case TokenNameREMAINDER :
3480 return "%"; //$NON-NLS-1$
3482 return "^"; //$NON-NLS-1$
3484 return "&"; //$NON-NLS-1$
3485 case TokenNameMULTIPLY :
3486 return "*"; //$NON-NLS-1$
3488 return "|"; //$NON-NLS-1$
3489 case TokenNameTWIDDLE :
3490 return "~"; //$NON-NLS-1$
3491 case TokenNameTWIDDLE_EQUAL :
3492 return "~="; //$NON-NLS-1$
3493 case TokenNameDIVIDE :
3494 return "/"; //$NON-NLS-1$
3495 case TokenNameGREATER :
3496 return ">"; //$NON-NLS-1$
3497 case TokenNameLESS :
3498 return "<"; //$NON-NLS-1$
3499 case TokenNameLPAREN :
3500 return "("; //$NON-NLS-1$
3501 case TokenNameRPAREN :
3502 return ")"; //$NON-NLS-1$
3503 case TokenNameLBRACE :
3504 return "{"; //$NON-NLS-1$
3505 case TokenNameRBRACE :
3506 return "}"; //$NON-NLS-1$
3507 case TokenNameLBRACKET :
3508 return "["; //$NON-NLS-1$
3509 case TokenNameRBRACKET :
3510 return "]"; //$NON-NLS-1$
3511 case TokenNameSEMICOLON :
3512 return ";"; //$NON-NLS-1$
3513 case TokenNameQUESTION :
3514 return "?"; //$NON-NLS-1$
3515 case TokenNameCOLON :
3516 return ":"; //$NON-NLS-1$
3517 case TokenNameCOMMA :
3518 return ","; //$NON-NLS-1$
3520 return "."; //$NON-NLS-1$
3521 case TokenNameEQUAL :
3522 return "="; //$NON-NLS-1$
3525 case TokenNameDOLLAR_LBRACE :
3528 return "EOF"; //$NON-NLS-1$
3530 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3535 boolean tokenizeComments,
3536 boolean tokenizeWhiteSpace,
3537 boolean checkNonExternalizedStringLiterals) {
3541 checkNonExternalizedStringLiterals,
3546 boolean tokenizeComments,
3547 boolean tokenizeWhiteSpace,
3548 boolean checkNonExternalizedStringLiterals,
3549 boolean assertMode) {
3550 this.eofPosition = Integer.MAX_VALUE;
3551 this.tokenizeComments = tokenizeComments;
3552 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3553 this.checkNonExternalizedStringLiterals =
3554 checkNonExternalizedStringLiterals;
3555 this.assertMode = assertMode;
3558 private void checkNonExternalizeString() throws InvalidInputException {
3559 if (currentLine == null)
3561 parseTags(currentLine);
3564 private void parseTags(NLSLine line) throws InvalidInputException {
3565 String s = new String(getCurrentTokenSource());
3566 int pos = s.indexOf(TAG_PREFIX);
3567 int lineLength = line.size();
3569 int start = pos + TAG_PREFIX_LENGTH;
3570 int end = s.indexOf(TAG_POSTFIX, start);
3571 String index = s.substring(start, end);
3574 i = Integer.parseInt(index) - 1;
3575 // Tags are one based not zero based.
3576 } catch (NumberFormatException e) {
3577 i = -1; // we don't want to consider this as a valid NLS tag
3579 if (line.exists(i)) {
3582 pos = s.indexOf(TAG_PREFIX, start);
3585 this.nonNLSStrings = new StringLiteral[lineLength];
3586 int nonNLSCounter = 0;
3587 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3588 StringLiteral literal = (StringLiteral) iterator.next();
3589 if (literal != null) {
3590 this.nonNLSStrings[nonNLSCounter++] = literal;
3593 if (nonNLSCounter == 0) {
3594 this.nonNLSStrings = null;
3598 this.wasNonExternalizedStringLiteral = true;
3599 if (nonNLSCounter != lineLength) {
3603 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),