1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray =
120 new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121 static final int TableSize = 30, InternalTableSize = 6;
123 public static final int OptimizedLength = 6;
125 final char[][][][] charArray_length =
126 new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0,
155 public static final int RoundBracket = 0;
156 public static final int SquareBracket = 1;
157 public static final int CurlyBracket = 2;
158 public static final int BracketKinds = 3;
160 public static final boolean DEBUG = false;
164 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
165 this(tokenizeComments, tokenizeWhiteSpace, false);
169 * Determines if the specified character is
170 * permissible as the first character in a PHP identifier
172 public static boolean isPHPIdentifierStart(char ch) {
173 return Character.isLetter(ch)
175 || (0x7F <= ch && ch <= 0xFF);
179 * Determines if the specified character may be part of a PHP identifier as
180 * other than the first character
182 public static boolean isPHPIdentifierPart(char ch) {
183 return Character.isLetterOrDigit(ch)
185 || (0x7F <= ch && ch <= 0xFF);
188 public final boolean atEnd() {
189 // This code is not relevant if source is
190 // Only a part of the real stream input
192 return source.length == currentPosition;
194 public char[] getCurrentIdentifierSource() {
195 //return the token REAL source (aka unicodes are precomputed)
198 if (withoutUnicodePtr != 0)
199 //0 is used as a fast test flag so the real first char is in position 1
201 withoutUnicodeBuffer,
203 result = new char[withoutUnicodePtr],
207 int length = currentPosition - startPosition;
208 switch (length) { // see OptimizedLength
210 return optimizedCurrentTokenSource1();
212 return optimizedCurrentTokenSource2();
214 return optimizedCurrentTokenSource3();
216 return optimizedCurrentTokenSource4();
218 return optimizedCurrentTokenSource5();
220 return optimizedCurrentTokenSource6();
226 result = new char[length],
232 public int getCurrentTokenEndPosition() {
233 return this.currentPosition - 1;
235 public final char[] getCurrentTokenSource() {
236 // Return the token REAL source (aka unicodes are precomputed)
239 if (withoutUnicodePtr != 0)
240 // 0 is used as a fast test flag so the real first char is in position 1
242 withoutUnicodeBuffer,
244 result = new char[withoutUnicodePtr],
252 result = new char[length = currentPosition - startPosition],
259 public final char[] getCurrentTokenSource(int startPos) {
260 // Return the token REAL source (aka unicodes are precomputed)
263 if (withoutUnicodePtr != 0)
264 // 0 is used as a fast test flag so the real first char is in position 1
266 withoutUnicodeBuffer,
268 result = new char[withoutUnicodePtr],
276 result = new char[length = currentPosition - startPos],
283 public final char[] getCurrentTokenSourceString() {
284 //return the token REAL source (aka unicodes are precomputed).
285 //REMOVE the two " that are at the beginning and the end.
288 if (withoutUnicodePtr != 0)
289 //0 is used as a fast test flag so the real first char is in position 1
290 System.arraycopy(withoutUnicodeBuffer, 2,
291 //2 is 1 (real start) + 1 (to jump over the ")
292 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
298 result = new char[length = currentPosition - startPosition - 2],
304 public int getCurrentTokenStartPosition() {
305 return this.startPosition;
308 * Search the source position corresponding to the end of a given line number
310 * Line numbers are 1-based, and relative to the scanner initialPosition.
311 * Character positions are 0-based.
313 * In case the given line number is inconsistent, answers -1.
315 public final int getLineEnd(int lineNumber) {
317 if (lineEnds == null)
319 if (lineNumber >= lineEnds.length)
324 if (lineNumber == lineEnds.length - 1)
326 return lineEnds[lineNumber - 1];
327 // next line start one character behind the lineEnd of the previous line
330 * Search the source position corresponding to the beginning of a given line number
332 * Line numbers are 1-based, and relative to the scanner initialPosition.
333 * Character positions are 0-based.
335 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
337 * In case the given line number is inconsistent, answers -1.
339 public final int getLineStart(int lineNumber) {
341 if (lineEnds == null)
343 if (lineNumber >= lineEnds.length)
349 return initialPosition;
350 return lineEnds[lineNumber - 2] + 1;
351 // next line start one character behind the lineEnd of the previous line
353 public final boolean getNextChar(char testedChar) {
355 //handle the case of unicode.
356 //when a unicode appears then we must use a buffer that holds char internal values
357 //At the end of this method currentCharacter holds the new visited char
358 //and currentPosition points right next after it
359 //Both previous lines are true if the currentCharacter is == to the testedChar
360 //On false, no side effect has occured.
362 //ALL getNextChar.... ARE OPTIMIZED COPIES
364 int temp = currentPosition;
366 if (((currentCharacter = source[currentPosition++]) == '\\')
367 && (source[currentPosition] == 'u')) {
368 //-------------unicode traitement ------------
372 while (source[currentPosition] == 'u') {
377 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
379 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
381 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
383 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
385 currentPosition = temp;
389 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
390 if (currentCharacter != testedChar) {
391 currentPosition = temp;
394 unicodeAsBackSlash = currentCharacter == '\\';
396 //need the unicode buffer
397 if (withoutUnicodePtr == 0) {
398 //buffer all the entries that have been left aside....
399 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
403 withoutUnicodeBuffer,
407 //fill the buffer with the char
408 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
411 } //-------------end unicode traitement--------------
413 if (currentCharacter != testedChar) {
414 currentPosition = temp;
417 unicodeAsBackSlash = false;
418 if (withoutUnicodePtr != 0)
419 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
422 } catch (IndexOutOfBoundsException e) {
423 unicodeAsBackSlash = false;
424 currentPosition = temp;
428 public final int getNextChar(char testedChar1, char testedChar2) {
429 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
430 //test can be done with (x==0) for the first and (x>0) for the second
431 //handle the case of unicode.
432 //when a unicode appears then we must use a buffer that holds char internal values
433 //At the end of this method currentCharacter holds the new visited char
434 //and currentPosition points right next after it
435 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
436 //On false, no side effect has occured.
438 //ALL getNextChar.... ARE OPTIMIZED COPIES
440 int temp = currentPosition;
443 if (((currentCharacter = source[currentPosition++]) == '\\')
444 && (source[currentPosition] == 'u')) {
445 //-------------unicode traitement ------------
449 while (source[currentPosition] == 'u') {
454 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
456 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
458 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
460 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
462 currentPosition = temp;
466 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
467 if (currentCharacter == testedChar1)
469 else if (currentCharacter == testedChar2)
472 currentPosition = temp;
476 //need the unicode buffer
477 if (withoutUnicodePtr == 0) {
478 //buffer all the entries that have been left aside....
479 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
483 withoutUnicodeBuffer,
487 //fill the buffer with the char
488 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
490 } //-------------end unicode traitement--------------
492 if (currentCharacter == testedChar1)
494 else if (currentCharacter == testedChar2)
497 currentPosition = temp;
501 if (withoutUnicodePtr != 0)
502 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
505 } catch (IndexOutOfBoundsException e) {
506 currentPosition = temp;
510 public final boolean getNextCharAsDigit() {
512 //handle the case of unicode.
513 //when a unicode appears then we must use a buffer that holds char internal values
514 //At the end of this method currentCharacter holds the new visited char
515 //and currentPosition points right next after it
516 //Both previous lines are true if the currentCharacter is a digit
517 //On false, no side effect has occured.
519 //ALL getNextChar.... ARE OPTIMIZED COPIES
521 int temp = currentPosition;
523 if (((currentCharacter = source[currentPosition++]) == '\\')
524 && (source[currentPosition] == 'u')) {
525 //-------------unicode traitement ------------
529 while (source[currentPosition] == 'u') {
534 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
536 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
538 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
540 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
542 currentPosition = temp;
546 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
547 if (!Character.isDigit(currentCharacter)) {
548 currentPosition = temp;
552 //need the unicode buffer
553 if (withoutUnicodePtr == 0) {
554 //buffer all the entries that have been left aside....
555 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
559 withoutUnicodeBuffer,
563 //fill the buffer with the char
564 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
566 } //-------------end unicode traitement--------------
568 if (!Character.isDigit(currentCharacter)) {
569 currentPosition = temp;
572 if (withoutUnicodePtr != 0)
573 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
576 } catch (IndexOutOfBoundsException e) {
577 currentPosition = temp;
581 public final boolean getNextCharAsDigit(int radix) {
583 //handle the case of unicode.
584 //when a unicode appears then we must use a buffer that holds char internal values
585 //At the end of this method currentCharacter holds the new visited char
586 //and currentPosition points right next after it
587 //Both previous lines are true if the currentCharacter is a digit base on radix
588 //On false, no side effect has occured.
590 //ALL getNextChar.... ARE OPTIMIZED COPIES
592 int temp = currentPosition;
594 if (((currentCharacter = source[currentPosition++]) == '\\')
595 && (source[currentPosition] == 'u')) {
596 //-------------unicode traitement ------------
600 while (source[currentPosition] == 'u') {
605 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
607 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
609 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
611 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
613 currentPosition = temp;
617 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
618 if (Character.digit(currentCharacter, radix) == -1) {
619 currentPosition = temp;
623 //need the unicode buffer
624 if (withoutUnicodePtr == 0) {
625 //buffer all the entries that have been left aside....
626 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
630 withoutUnicodeBuffer,
634 //fill the buffer with the char
635 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
637 } //-------------end unicode traitement--------------
639 if (Character.digit(currentCharacter, radix) == -1) {
640 currentPosition = temp;
643 if (withoutUnicodePtr != 0)
644 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
647 } catch (IndexOutOfBoundsException e) {
648 currentPosition = temp;
652 public boolean getNextCharAsJavaIdentifierPart() {
654 //handle the case of unicode.
655 //when a unicode appears then we must use a buffer that holds char internal values
656 //At the end of this method currentCharacter holds the new visited char
657 //and currentPosition points right next after it
658 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
659 //On false, no side effect has occured.
661 //ALL getNextChar.... ARE OPTIMIZED COPIES
663 int temp = currentPosition;
665 if (((currentCharacter = source[currentPosition++]) == '\\')
666 && (source[currentPosition] == 'u')) {
667 //-------------unicode traitement ------------
671 while (source[currentPosition] == 'u') {
676 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
678 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
680 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
682 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
684 currentPosition = temp;
688 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
689 if (!isPHPIdentifierPart(currentCharacter)) {
690 currentPosition = temp;
694 //need the unicode buffer
695 if (withoutUnicodePtr == 0) {
696 //buffer all the entries that have been left aside....
697 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
701 withoutUnicodeBuffer,
705 //fill the buffer with the char
706 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
708 } //-------------end unicode traitement--------------
710 if (!isPHPIdentifierPart(currentCharacter)) {
711 currentPosition = temp;
715 if (withoutUnicodePtr != 0)
716 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
719 } catch (IndexOutOfBoundsException e) {
720 currentPosition = temp;
725 public int getNextToken() throws InvalidInputException {
726 int htmlPosition = currentPosition;
729 currentCharacter = source[currentPosition++];
730 if (currentCharacter == '<') {
731 if (getNextChar('?')) {
732 currentCharacter = source[currentPosition++];
733 if ((currentCharacter == ' ')
734 || Character.isWhitespace(currentCharacter)) {
736 startPosition = currentPosition;
738 if (tokenizeWhiteSpace) {
739 // && (whiteStart != currentPosition - 1)) {
740 // reposition scanner in case we are interested by spaces as tokens
741 startPosition = htmlPosition;
742 return TokenNameHTML;
746 (currentCharacter == 'P') || (currentCharacter == 'p');
748 int test = getNextChar('H', 'h');
750 test = getNextChar('P', 'p');
753 startPosition = currentPosition;
756 if (tokenizeWhiteSpace) {
757 // && (whiteStart != currentPosition - 1)) {
758 // reposition scanner in case we are interested by spaces as tokens
759 startPosition = htmlPosition;
760 return TokenNameHTML;
769 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
770 if (recordLineSeparator) {
777 } //-----------------end switch while try--------------------
778 catch (IndexOutOfBoundsException e) {
779 if (tokenizeWhiteSpace) {
780 // && (whiteStart != currentPosition - 1)) {
781 // reposition scanner in case we are interested by spaces as tokens
782 startPosition = htmlPosition;
790 jumpOverMethodBody();
792 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
795 while (true) { //loop for jumping over comments
796 withoutUnicodePtr = 0;
797 //start with a new token (even comment written with unicode )
799 // ---------Consume white space and handles startPosition---------
800 int whiteStart = currentPosition;
801 boolean isWhiteSpace;
803 startPosition = currentPosition;
804 if (((currentCharacter = source[currentPosition++]) == '\\')
805 && (source[currentPosition] == 'u')) {
806 isWhiteSpace = jumpOverUnicodeWhiteSpace();
808 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
809 checkNonExternalizeString();
810 if (recordLineSeparator) {
817 (currentCharacter == ' ')
818 || Character.isWhitespace(currentCharacter);
820 } while (isWhiteSpace);
821 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
822 // reposition scanner in case we are interested by spaces as tokens
824 startPosition = whiteStart;
825 return TokenNameWHITESPACE;
827 //little trick to get out in the middle of a source compuation
828 if (currentPosition > eofPosition)
831 // ---------Identify the next token-------------
833 switch (currentCharacter) {
835 return TokenNameLPAREN;
837 return TokenNameRPAREN;
839 return TokenNameLBRACE;
841 return TokenNameRBRACE;
843 return TokenNameLBRACKET;
845 return TokenNameRBRACKET;
847 return TokenNameSEMICOLON;
849 return TokenNameCOMMA;
852 if (getNextCharAsDigit())
853 return scanNumber(true);
858 if ((test = getNextChar('+', '=')) == 0)
859 return TokenNamePLUS_PLUS;
861 return TokenNamePLUS_EQUAL;
862 return TokenNamePLUS;
867 if ((test = getNextChar('-', '=')) == 0)
868 return TokenNameMINUS_MINUS;
870 return TokenNameMINUS_EQUAL;
871 if (getNextChar('>'))
872 return TokenNameMINUS_GREATER;
874 return TokenNameMINUS;
877 if (getNextChar('='))
878 return TokenNameTWIDDLE_EQUAL;
879 return TokenNameTWIDDLE;
881 if (getNextChar('='))
882 return TokenNameNOT_EQUAL;
885 if (getNextChar('='))
886 return TokenNameMULTIPLY_EQUAL;
887 return TokenNameMULTIPLY;
889 if (getNextChar('='))
890 return TokenNameREMAINDER_EQUAL;
891 return TokenNameREMAINDER;
895 if ((test = getNextChar('=', '<')) == 0)
896 return TokenNameLESS_EQUAL;
898 if (getNextChar('='))
899 return TokenNameLEFT_SHIFT_EQUAL;
900 if (getNextChar('<')) {
901 int heredocStart = currentPosition;
902 int heredocLength = 0;
903 currentCharacter = source[currentPosition++];
904 if (isPHPIdentifierStart(currentCharacter)) {
905 currentCharacter = source[currentPosition++];
907 return TokenNameERROR;
909 while (isPHPIdentifierPart(currentCharacter)) {
910 currentCharacter = source[currentPosition++];
913 heredocLength = currentPosition - heredocStart - 1;
915 // heredoc end-tag determination
916 boolean endTag = true;
919 ch = source[currentPosition++];
920 if (ch == '\r' || ch == '\n') {
921 if (recordLineSeparator) {
926 for (int i = 0; i < heredocLength; i++) {
927 if (source[currentPosition + i]
928 != source[heredocStart + i]) {
934 currentPosition += heredocLength - 1;
935 currentCharacter = source[currentPosition++];
936 break; // do...while loop
944 return TokenNameHEREDOC;
946 return TokenNameLEFT_SHIFT;
948 return TokenNameLESS;
953 if ((test = getNextChar('=', '>')) == 0)
954 return TokenNameGREATER_EQUAL;
956 if ((test = getNextChar('=', '>')) == 0)
957 return TokenNameRIGHT_SHIFT_EQUAL;
958 return TokenNameRIGHT_SHIFT;
960 return TokenNameGREATER;
963 if (getNextChar('='))
964 return TokenNameEQUAL_EQUAL;
965 if (getNextChar('>'))
966 return TokenNameEQUAL_GREATER;
967 return TokenNameEQUAL;
971 if ((test = getNextChar('&', '=')) == 0)
972 return TokenNameAND_AND;
974 return TokenNameAND_EQUAL;
980 if ((test = getNextChar('|', '=')) == 0)
981 return TokenNameOR_OR;
983 return TokenNameOR_EQUAL;
987 if (getNextChar('='))
988 return TokenNameXOR_EQUAL;
991 if (getNextChar('>')) {
993 return TokenNameStopPHP;
995 return TokenNameQUESTION;
997 if (getNextChar(':'))
998 return TokenNameCOLON_COLON;
999 return TokenNameCOLON;
1005 // if ((test = getNextChar('\n', '\r')) == 0) {
1006 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1009 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1010 // for (int lookAhead = 0;
1013 // if (currentPosition + lookAhead
1014 // == source.length)
1016 // if (source[currentPosition + lookAhead]
1019 // if (source[currentPosition + lookAhead]
1021 // currentPosition += lookAhead + 1;
1025 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1028 // if (getNextChar('\'')) {
1029 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1030 // for (int lookAhead = 0;
1033 // if (currentPosition + lookAhead
1034 // == source.length)
1036 // if (source[currentPosition + lookAhead]
1039 // if (source[currentPosition + lookAhead]
1041 // currentPosition += lookAhead + 1;
1045 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1047 // if (getNextChar('\\'))
1048 // scanEscapeCharacter();
1049 // else { // consume next character
1050 // unicodeAsBackSlash = false;
1051 // if (((currentCharacter = source[currentPosition++])
1053 // && (source[currentPosition] == 'u')) {
1054 // getNextUnicodeChar();
1056 // if (withoutUnicodePtr != 0) {
1057 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1058 // currentCharacter;
1062 // // if (getNextChar('\''))
1063 // // return TokenNameCharacterLiteral;
1064 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1065 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1066 // if (currentPosition + lookAhead == source.length)
1068 // if (source[currentPosition + lookAhead] == '\n')
1070 // if (source[currentPosition + lookAhead] == '\'') {
1071 // currentPosition += lookAhead + 1;
1075 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1078 // consume next character
1079 unicodeAsBackSlash = false;
1080 if (((currentCharacter = source[currentPosition++]) == '\\')
1081 && (source[currentPosition] == 'u')) {
1082 getNextUnicodeChar();
1084 if (withoutUnicodePtr != 0) {
1085 withoutUnicodeBuffer[++withoutUnicodePtr] =
1090 while (currentCharacter != '\'') {
1092 /**** in PHP \r and \n are valid in string literals ****/
1093 // if ((currentCharacter == '\n')
1094 // || (currentCharacter == '\r')) {
1095 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1096 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1097 // if (currentPosition + lookAhead == source.length)
1099 // if (source[currentPosition + lookAhead] == '\n')
1101 // if (source[currentPosition + lookAhead] == '\"') {
1102 // currentPosition += lookAhead + 1;
1106 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1108 if (currentCharacter == '\\') {
1109 int escapeSize = currentPosition;
1110 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1111 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1112 scanSingleQuotedEscapeCharacter();
1113 escapeSize = currentPosition - escapeSize;
1114 if (withoutUnicodePtr == 0) {
1115 //buffer all the entries that have been left aside....
1117 currentPosition - escapeSize - 1 - startPosition;
1121 withoutUnicodeBuffer,
1124 withoutUnicodeBuffer[++withoutUnicodePtr] =
1126 } else { //overwrite the / in the buffer
1127 withoutUnicodeBuffer[withoutUnicodePtr] =
1129 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1130 withoutUnicodePtr--;
1134 // consume next character
1135 unicodeAsBackSlash = false;
1136 if (((currentCharacter = source[currentPosition++]) == '\\')
1137 && (source[currentPosition] == 'u')) {
1138 getNextUnicodeChar();
1140 if (withoutUnicodePtr != 0) {
1141 withoutUnicodeBuffer[++withoutUnicodePtr] =
1147 } catch (IndexOutOfBoundsException e) {
1148 throw new InvalidInputException(UNTERMINATED_STRING);
1149 } catch (InvalidInputException e) {
1150 if (e.getMessage().equals(INVALID_ESCAPE)) {
1151 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1152 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1153 if (currentPosition + lookAhead == source.length)
1155 if (source[currentPosition + lookAhead] == '\n')
1157 if (source[currentPosition + lookAhead] == '\'') {
1158 currentPosition += lookAhead + 1;
1166 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1167 if (currentLine == null) {
1168 currentLine = new NLSLine();
1169 lines.add(currentLine);
1173 getCurrentTokenSourceString(),
1175 currentPosition - 1));
1177 return TokenNameStringConstant;
1180 // consume next character
1181 unicodeAsBackSlash = false;
1182 if (((currentCharacter = source[currentPosition++]) == '\\')
1183 && (source[currentPosition] == 'u')) {
1184 getNextUnicodeChar();
1186 if (withoutUnicodePtr != 0) {
1187 withoutUnicodeBuffer[++withoutUnicodePtr] =
1192 while (currentCharacter != '"') {
1194 /**** in PHP \r and \n are valid in string literals ****/
1195 // if ((currentCharacter == '\n')
1196 // || (currentCharacter == '\r')) {
1197 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1198 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1199 // if (currentPosition + lookAhead == source.length)
1201 // if (source[currentPosition + lookAhead] == '\n')
1203 // if (source[currentPosition + lookAhead] == '\"') {
1204 // currentPosition += lookAhead + 1;
1208 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1210 if (currentCharacter == '\\') {
1211 int escapeSize = currentPosition;
1212 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1213 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1214 scanDoubleQuotedEscapeCharacter();
1215 escapeSize = currentPosition - escapeSize;
1216 if (withoutUnicodePtr == 0) {
1217 //buffer all the entries that have been left aside....
1219 currentPosition - escapeSize - 1 - startPosition;
1223 withoutUnicodeBuffer,
1226 withoutUnicodeBuffer[++withoutUnicodePtr] =
1228 } else { //overwrite the / in the buffer
1229 withoutUnicodeBuffer[withoutUnicodePtr] =
1231 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1232 withoutUnicodePtr--;
1236 // consume next character
1237 unicodeAsBackSlash = false;
1238 if (((currentCharacter = source[currentPosition++]) == '\\')
1239 && (source[currentPosition] == 'u')) {
1240 getNextUnicodeChar();
1242 if (withoutUnicodePtr != 0) {
1243 withoutUnicodeBuffer[++withoutUnicodePtr] =
1249 } catch (IndexOutOfBoundsException e) {
1250 throw new InvalidInputException(UNTERMINATED_STRING);
1251 } catch (InvalidInputException e) {
1252 if (e.getMessage().equals(INVALID_ESCAPE)) {
1253 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1254 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1255 if (currentPosition + lookAhead == source.length)
1257 if (source[currentPosition + lookAhead] == '\n')
1259 if (source[currentPosition + lookAhead] == '\"') {
1260 currentPosition += lookAhead + 1;
1268 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1269 if (currentLine == null) {
1270 currentLine = new NLSLine();
1271 lines.add(currentLine);
1275 getCurrentTokenSourceString(),
1277 currentPosition - 1));
1279 return TokenNameStringLiteral;
1282 // consume next character
1283 unicodeAsBackSlash = false;
1284 if (((currentCharacter = source[currentPosition++]) == '\\')
1285 && (source[currentPosition] == 'u')) {
1286 getNextUnicodeChar();
1288 if (withoutUnicodePtr != 0) {
1289 withoutUnicodeBuffer[++withoutUnicodePtr] =
1294 while (currentCharacter != '`') {
1296 /**** in PHP \r and \n are valid in string literals ****/
1297 // if ((currentCharacter == '\n')
1298 // || (currentCharacter == '\r')) {
1299 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1300 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1301 // if (currentPosition + lookAhead == source.length)
1303 // if (source[currentPosition + lookAhead] == '\n')
1305 // if (source[currentPosition + lookAhead] == '\"') {
1306 // currentPosition += lookAhead + 1;
1310 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1312 if (currentCharacter == '\\') {
1313 int escapeSize = currentPosition;
1314 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1315 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1316 scanDoubleQuotedEscapeCharacter();
1317 escapeSize = currentPosition - escapeSize;
1318 if (withoutUnicodePtr == 0) {
1319 //buffer all the entries that have been left aside....
1321 currentPosition - escapeSize - 1 - startPosition;
1325 withoutUnicodeBuffer,
1328 withoutUnicodeBuffer[++withoutUnicodePtr] =
1330 } else { //overwrite the / in the buffer
1331 withoutUnicodeBuffer[withoutUnicodePtr] =
1333 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1334 withoutUnicodePtr--;
1338 // consume next character
1339 unicodeAsBackSlash = false;
1340 if (((currentCharacter = source[currentPosition++]) == '\\')
1341 && (source[currentPosition] == 'u')) {
1342 getNextUnicodeChar();
1344 if (withoutUnicodePtr != 0) {
1345 withoutUnicodeBuffer[++withoutUnicodePtr] =
1351 } catch (IndexOutOfBoundsException e) {
1352 throw new InvalidInputException(UNTERMINATED_STRING);
1353 } catch (InvalidInputException e) {
1354 if (e.getMessage().equals(INVALID_ESCAPE)) {
1355 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1356 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1357 if (currentPosition + lookAhead == source.length)
1359 if (source[currentPosition + lookAhead] == '\n')
1361 if (source[currentPosition + lookAhead] == '`') {
1362 currentPosition += lookAhead + 1;
1370 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1371 if (currentLine == null) {
1372 currentLine = new NLSLine();
1373 lines.add(currentLine);
1377 getCurrentTokenSourceString(),
1379 currentPosition - 1));
1381 return TokenNameStringInterpolated;
1386 if ((currentCharacter == '#')
1387 || (test = getNextChar('/', '*')) == 0) {
1389 int endPositionForLineComment = 0;
1390 try { //get the next char
1391 if (((currentCharacter = source[currentPosition++])
1393 && (source[currentPosition] == 'u')) {
1394 //-------------unicode traitement ------------
1395 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1397 while (source[currentPosition] == 'u') {
1401 Character.getNumericValue(source[currentPosition++]))
1405 Character.getNumericValue(source[currentPosition++]))
1409 Character.getNumericValue(source[currentPosition++]))
1413 Character.getNumericValue(source[currentPosition++]))
1416 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1419 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1423 //handle the \\u case manually into comment
1424 if (currentCharacter == '\\') {
1425 if (source[currentPosition] == '\\')
1427 } //jump over the \\
1428 boolean isUnicode = false;
1429 while (currentCharacter != '\r'
1430 && currentCharacter != '\n') {
1431 if (currentCharacter == '?') {
1432 if (getNextChar('>')) {
1433 startPosition = currentPosition - 2;
1435 return TokenNameStopPHP;
1441 if (((currentCharacter = source[currentPosition++])
1443 && (source[currentPosition] == 'u')) {
1445 //-------------unicode traitement ------------
1446 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1448 while (source[currentPosition] == 'u') {
1452 Character.getNumericValue(source[currentPosition++]))
1456 Character.getNumericValue(
1457 source[currentPosition++]))
1461 Character.getNumericValue(
1462 source[currentPosition++]))
1466 Character.getNumericValue(
1467 source[currentPosition++]))
1470 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1473 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1476 //handle the \\u case manually into comment
1477 if (currentCharacter == '\\') {
1478 if (source[currentPosition] == '\\')
1480 } //jump over the \\
1483 endPositionForLineComment = currentPosition - 6;
1485 endPositionForLineComment = currentPosition - 1;
1487 recordComment(false);
1488 if ((currentCharacter == '\r')
1489 || (currentCharacter == '\n')) {
1490 checkNonExternalizeString();
1491 if (recordLineSeparator) {
1493 pushUnicodeLineSeparator();
1495 pushLineSeparator();
1501 if (tokenizeComments) {
1503 currentPosition = endPositionForLineComment;
1504 // reset one character behind
1506 return TokenNameCOMMENT_LINE;
1508 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1509 if (tokenizeComments) {
1511 // reset one character behind
1512 return TokenNameCOMMENT_LINE;
1518 //traditional and annotation comment
1519 boolean isJavadoc = false, star = false;
1520 // consume next character
1521 unicodeAsBackSlash = false;
1522 if (((currentCharacter = source[currentPosition++]) == '\\')
1523 && (source[currentPosition] == 'u')) {
1524 getNextUnicodeChar();
1526 if (withoutUnicodePtr != 0) {
1527 withoutUnicodeBuffer[++withoutUnicodePtr] =
1532 if (currentCharacter == '*') {
1536 if ((currentCharacter == '\r')
1537 || (currentCharacter == '\n')) {
1538 checkNonExternalizeString();
1539 if (recordLineSeparator) {
1540 pushLineSeparator();
1545 try { //get the next char
1546 if (((currentCharacter = source[currentPosition++])
1548 && (source[currentPosition] == 'u')) {
1549 //-------------unicode traitement ------------
1550 getNextUnicodeChar();
1552 //handle the \\u case manually into comment
1553 if (currentCharacter == '\\') {
1554 if (source[currentPosition] == '\\')
1558 // empty comment is not a javadoc /**/
1559 if (currentCharacter == '/') {
1562 //loop until end of comment */
1563 while ((currentCharacter != '/') || (!star)) {
1564 if ((currentCharacter == '\r')
1565 || (currentCharacter == '\n')) {
1566 checkNonExternalizeString();
1567 if (recordLineSeparator) {
1568 pushLineSeparator();
1573 star = currentCharacter == '*';
1575 if (((currentCharacter = source[currentPosition++])
1577 && (source[currentPosition] == 'u')) {
1578 //-------------unicode traitement ------------
1579 getNextUnicodeChar();
1581 //handle the \\u case manually into comment
1582 if (currentCharacter == '\\') {
1583 if (source[currentPosition] == '\\')
1585 } //jump over the \\
1587 recordComment(isJavadoc);
1588 if (tokenizeComments) {
1590 return TokenNameCOMMENT_PHPDOC;
1591 return TokenNameCOMMENT_BLOCK;
1593 } catch (IndexOutOfBoundsException e) {
1594 throw new InvalidInputException(UNTERMINATED_COMMENT);
1598 if (getNextChar('='))
1599 return TokenNameDIVIDE_EQUAL;
1600 return TokenNameDIVIDE;
1604 return TokenNameEOF;
1605 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1606 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1609 if (currentCharacter == '$') {
1610 while ((currentCharacter = source[currentPosition++]) == '$') {
1612 if (currentCharacter == '{')
1613 return TokenNameDOLLAR_LBRACE;
1614 if (isPHPIdentifierStart(currentCharacter))
1615 return scanIdentifierOrKeyword(true);
1616 return TokenNameERROR;
1618 if (isPHPIdentifierStart(currentCharacter))
1619 return scanIdentifierOrKeyword(false);
1620 if (Character.isDigit(currentCharacter))
1621 return scanNumber(false);
1622 return TokenNameERROR;
1625 } //-----------------end switch while try--------------------
1626 catch (IndexOutOfBoundsException e) {
1629 return TokenNameEOF;
1632 public final void getNextUnicodeChar()
1633 throws IndexOutOfBoundsException, InvalidInputException {
1635 //handle the case of unicode.
1636 //when a unicode appears then we must use a buffer that holds char internal values
1637 //At the end of this method currentCharacter holds the new visited char
1638 //and currentPosition points right next after it
1640 //ALL getNextChar.... ARE OPTIMIZED COPIES
1642 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1644 while (source[currentPosition] == 'u') {
1649 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1651 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1653 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1655 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1657 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1659 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1660 //need the unicode buffer
1661 if (withoutUnicodePtr == 0) {
1662 //buffer all the entries that have been left aside....
1663 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1667 withoutUnicodeBuffer,
1671 //fill the buffer with the char
1672 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1674 unicodeAsBackSlash = currentCharacter == '\\';
1676 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1678 public final void jumpOverMethodBody() {
1680 this.wasAcr = false;
1683 while (true) { //loop for jumping over comments
1684 // ---------Consume white space and handles startPosition---------
1685 boolean isWhiteSpace;
1687 startPosition = currentPosition;
1688 if (((currentCharacter = source[currentPosition++]) == '\\')
1689 && (source[currentPosition] == 'u')) {
1690 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1692 if (recordLineSeparator
1693 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1694 pushLineSeparator();
1695 isWhiteSpace = Character.isWhitespace(currentCharacter);
1697 } while (isWhiteSpace);
1699 // -------consume token until } is found---------
1700 switch (currentCharacter) {
1712 test = getNextChar('\\');
1715 scanDoubleQuotedEscapeCharacter();
1716 } catch (InvalidInputException ex) {
1719 try { // consume next character
1720 unicodeAsBackSlash = false;
1721 if (((currentCharacter = source[currentPosition++]) == '\\')
1722 && (source[currentPosition] == 'u')) {
1723 getNextUnicodeChar();
1725 if (withoutUnicodePtr != 0) {
1726 withoutUnicodeBuffer[++withoutUnicodePtr] =
1730 } catch (InvalidInputException ex) {
1738 try { // consume next character
1739 unicodeAsBackSlash = false;
1740 if (((currentCharacter = source[currentPosition++]) == '\\')
1741 && (source[currentPosition] == 'u')) {
1742 getNextUnicodeChar();
1744 if (withoutUnicodePtr != 0) {
1745 withoutUnicodeBuffer[++withoutUnicodePtr] =
1749 } catch (InvalidInputException ex) {
1751 while (currentCharacter != '"') {
1752 if (currentCharacter == '\r') {
1753 if (source[currentPosition] == '\n')
1756 // the string cannot go further that the line
1758 if (currentCharacter == '\n') {
1760 // the string cannot go further that the line
1762 if (currentCharacter == '\\') {
1764 scanDoubleQuotedEscapeCharacter();
1765 } catch (InvalidInputException ex) {
1768 try { // consume next character
1769 unicodeAsBackSlash = false;
1770 if (((currentCharacter = source[currentPosition++]) == '\\')
1771 && (source[currentPosition] == 'u')) {
1772 getNextUnicodeChar();
1774 if (withoutUnicodePtr != 0) {
1775 withoutUnicodeBuffer[++withoutUnicodePtr] =
1779 } catch (InvalidInputException ex) {
1782 } catch (IndexOutOfBoundsException e) {
1789 if ((test = getNextChar('/', '*')) == 0) {
1793 if (((currentCharacter = source[currentPosition++]) == '\\')
1794 && (source[currentPosition] == 'u')) {
1795 //-------------unicode traitement ------------
1796 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1798 while (source[currentPosition] == 'u') {
1802 Character.getNumericValue(source[currentPosition++]))
1806 Character.getNumericValue(source[currentPosition++]))
1810 Character.getNumericValue(source[currentPosition++]))
1814 Character.getNumericValue(source[currentPosition++]))
1817 //error don't care of the value
1818 currentCharacter = 'A';
1819 } //something different from \n and \r
1822 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1826 while (currentCharacter != '\r'
1827 && currentCharacter != '\n') {
1829 if (((currentCharacter = source[currentPosition++])
1831 && (source[currentPosition] == 'u')) {
1832 //-------------unicode traitement ------------
1833 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1835 while (source[currentPosition] == 'u') {
1839 Character.getNumericValue(source[currentPosition++]))
1843 Character.getNumericValue(source[currentPosition++]))
1847 Character.getNumericValue(source[currentPosition++]))
1851 Character.getNumericValue(source[currentPosition++]))
1854 //error don't care of the value
1855 currentCharacter = 'A';
1856 } //something different from \n and \r
1859 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1863 if (recordLineSeparator
1864 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1865 pushLineSeparator();
1866 } catch (IndexOutOfBoundsException e) {
1867 } //an eof will them be generated
1871 //traditional and annotation comment
1872 boolean star = false;
1873 try { // consume next character
1874 unicodeAsBackSlash = false;
1875 if (((currentCharacter = source[currentPosition++]) == '\\')
1876 && (source[currentPosition] == 'u')) {
1877 getNextUnicodeChar();
1879 if (withoutUnicodePtr != 0) {
1880 withoutUnicodeBuffer[++withoutUnicodePtr] =
1884 } catch (InvalidInputException ex) {
1886 if (currentCharacter == '*') {
1889 if (recordLineSeparator
1890 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1891 pushLineSeparator();
1892 try { //get the next char
1893 if (((currentCharacter = source[currentPosition++]) == '\\')
1894 && (source[currentPosition] == 'u')) {
1895 //-------------unicode traitement ------------
1896 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1898 while (source[currentPosition] == 'u') {
1902 Character.getNumericValue(source[currentPosition++]))
1906 Character.getNumericValue(source[currentPosition++]))
1910 Character.getNumericValue(source[currentPosition++]))
1914 Character.getNumericValue(source[currentPosition++]))
1917 //error don't care of the value
1918 currentCharacter = 'A';
1919 } //something different from * and /
1922 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1925 //loop until end of comment */
1926 while ((currentCharacter != '/') || (!star)) {
1927 if (recordLineSeparator
1928 && ((currentCharacter == '\r')
1929 || (currentCharacter == '\n')))
1930 pushLineSeparator();
1931 star = currentCharacter == '*';
1933 if (((currentCharacter = source[currentPosition++])
1935 && (source[currentPosition] == 'u')) {
1936 //-------------unicode traitement ------------
1937 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1939 while (source[currentPosition] == 'u') {
1943 Character.getNumericValue(source[currentPosition++]))
1947 Character.getNumericValue(source[currentPosition++]))
1951 Character.getNumericValue(source[currentPosition++]))
1955 Character.getNumericValue(source[currentPosition++]))
1958 //error don't care of the value
1959 currentCharacter = 'A';
1960 } //something different from * and /
1963 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1967 } catch (IndexOutOfBoundsException e) {
1976 if (isPHPIdentifierStart(currentCharacter)
1977 || currentCharacter == '$') {
1979 scanIdentifierOrKeyword((currentCharacter == '$'));
1980 } catch (InvalidInputException ex) {
1984 if (Character.isDigit(currentCharacter)) {
1987 } catch (InvalidInputException ex) {
1993 //-----------------end switch while try--------------------
1994 } catch (IndexOutOfBoundsException e) {
1995 } catch (InvalidInputException e) {
1999 public final boolean jumpOverUnicodeWhiteSpace()
2000 throws InvalidInputException {
2002 //handle the case of unicode. Jump over the next whiteSpace
2003 //making startPosition pointing on the next available char
2004 //On false, the currentCharacter is filled up with a potential
2008 this.wasAcr = false;
2010 int unicodeSize = 6;
2012 while (source[currentPosition] == 'u') {
2017 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2019 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2021 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2023 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2025 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2028 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2029 if (recordLineSeparator
2030 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2031 pushLineSeparator();
2032 if (Character.isWhitespace(currentCharacter))
2035 //buffer the new char which is not a white space
2036 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2037 //withoutUnicodePtr == 1 is true here
2039 } catch (IndexOutOfBoundsException e) {
2040 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2043 public final int[] getLineEnds() {
2044 //return a bounded copy of this.lineEnds
2047 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2051 public char[] getSource() {
2054 final char[] optimizedCurrentTokenSource1() {
2055 //return always the same char[] build only once
2057 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2058 char charOne = source[startPosition];
2113 return new char[] { charOne };
2117 final char[] optimizedCurrentTokenSource2() {
2118 //try to return the same char[] build only once
2122 (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2124 char[][] table = charArray_length[0][hash];
2126 while (++i < InternalTableSize) {
2127 char[] charArray = table[i];
2128 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2131 //---------other side---------
2133 int max = newEntry2;
2134 while (++i <= max) {
2135 char[] charArray = table[i];
2136 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2139 //--------add the entry-------
2140 if (++max >= InternalTableSize)
2143 table[max] = (r = new char[] { c0, c1 });
2148 final char[] optimizedCurrentTokenSource3() {
2149 //try to return the same char[] build only once
2153 (((c0 = source[startPosition]) << 12)
2154 + ((c1 = source[startPosition + 1]) << 6)
2155 + (c2 = source[startPosition + 2]))
2157 char[][] table = charArray_length[1][hash];
2159 while (++i < InternalTableSize) {
2160 char[] charArray = table[i];
2161 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2164 //---------other side---------
2166 int max = newEntry3;
2167 while (++i <= max) {
2168 char[] charArray = table[i];
2169 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2172 //--------add the entry-------
2173 if (++max >= InternalTableSize)
2176 table[max] = (r = new char[] { c0, c1, c2 });
2181 final char[] optimizedCurrentTokenSource4() {
2182 //try to return the same char[] build only once
2184 char c0, c1, c2, c3;
2186 ((((long) (c0 = source[startPosition])) << 18)
2187 + ((c1 = source[startPosition + 1]) << 12)
2188 + ((c2 = source[startPosition + 2]) << 6)
2189 + (c3 = source[startPosition + 3]))
2191 char[][] table = charArray_length[2][(int) hash];
2193 while (++i < InternalTableSize) {
2194 char[] charArray = table[i];
2195 if ((c0 == charArray[0])
2196 && (c1 == charArray[1])
2197 && (c2 == charArray[2])
2198 && (c3 == charArray[3]))
2201 //---------other side---------
2203 int max = newEntry4;
2204 while (++i <= max) {
2205 char[] charArray = table[i];
2206 if ((c0 == charArray[0])
2207 && (c1 == charArray[1])
2208 && (c2 == charArray[2])
2209 && (c3 == charArray[3]))
2212 //--------add the entry-------
2213 if (++max >= InternalTableSize)
2216 table[max] = (r = new char[] { c0, c1, c2, c3 });
2222 final char[] optimizedCurrentTokenSource5() {
2223 //try to return the same char[] build only once
2225 char c0, c1, c2, c3, c4;
2227 ((((long) (c0 = source[startPosition])) << 24)
2228 + (((long) (c1 = source[startPosition + 1])) << 18)
2229 + ((c2 = source[startPosition + 2]) << 12)
2230 + ((c3 = source[startPosition + 3]) << 6)
2231 + (c4 = source[startPosition + 4]))
2233 char[][] table = charArray_length[3][(int) hash];
2235 while (++i < InternalTableSize) {
2236 char[] charArray = table[i];
2237 if ((c0 == charArray[0])
2238 && (c1 == charArray[1])
2239 && (c2 == charArray[2])
2240 && (c3 == charArray[3])
2241 && (c4 == charArray[4]))
2244 //---------other side---------
2246 int max = newEntry5;
2247 while (++i <= max) {
2248 char[] charArray = table[i];
2249 if ((c0 == charArray[0])
2250 && (c1 == charArray[1])
2251 && (c2 == charArray[2])
2252 && (c3 == charArray[3])
2253 && (c4 == charArray[4]))
2256 //--------add the entry-------
2257 if (++max >= InternalTableSize)
2260 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2266 final char[] optimizedCurrentTokenSource6() {
2267 //try to return the same char[] build only once
2269 char c0, c1, c2, c3, c4, c5;
2271 ((((long) (c0 = source[startPosition])) << 32)
2272 + (((long) (c1 = source[startPosition + 1])) << 24)
2273 + (((long) (c2 = source[startPosition + 2])) << 18)
2274 + ((c3 = source[startPosition + 3]) << 12)
2275 + ((c4 = source[startPosition + 4]) << 6)
2276 + (c5 = source[startPosition + 5]))
2278 char[][] table = charArray_length[4][(int) hash];
2280 while (++i < InternalTableSize) {
2281 char[] charArray = table[i];
2282 if ((c0 == charArray[0])
2283 && (c1 == charArray[1])
2284 && (c2 == charArray[2])
2285 && (c3 == charArray[3])
2286 && (c4 == charArray[4])
2287 && (c5 == charArray[5]))
2290 //---------other side---------
2292 int max = newEntry6;
2293 while (++i <= max) {
2294 char[] charArray = table[i];
2295 if ((c0 == charArray[0])
2296 && (c1 == charArray[1])
2297 && (c2 == charArray[2])
2298 && (c3 == charArray[3])
2299 && (c4 == charArray[4])
2300 && (c5 == charArray[5]))
2303 //--------add the entry-------
2304 if (++max >= InternalTableSize)
2307 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2312 public final void pushLineSeparator() throws InvalidInputException {
2313 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2314 final int INCREMENT = 250;
2316 if (this.checkNonExternalizedStringLiterals) {
2317 // reinitialize the current line for non externalize strings purpose
2320 //currentCharacter is at position currentPosition-1
2323 if (currentCharacter == '\r') {
2324 int separatorPos = currentPosition - 1;
2325 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2327 //System.out.println("CR-" + separatorPos);
2329 lineEnds[++linePtr] = separatorPos;
2330 } catch (IndexOutOfBoundsException e) {
2331 //linePtr value is correct
2332 int oldLength = lineEnds.length;
2333 int[] old = lineEnds;
2334 lineEnds = new int[oldLength + INCREMENT];
2335 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2336 lineEnds[linePtr] = separatorPos;
2338 // look-ahead for merged cr+lf
2340 if (source[currentPosition] == '\n') {
2341 //System.out.println("look-ahead LF-" + currentPosition);
2342 lineEnds[linePtr] = currentPosition;
2348 } catch (IndexOutOfBoundsException e) {
2353 if (currentCharacter == '\n') {
2354 //must merge eventual cr followed by lf
2355 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2356 //System.out.println("merge LF-" + (currentPosition - 1));
2357 lineEnds[linePtr] = currentPosition - 1;
2359 int separatorPos = currentPosition - 1;
2360 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2362 // System.out.println("LF-" + separatorPos);
2364 lineEnds[++linePtr] = separatorPos;
2365 } catch (IndexOutOfBoundsException e) {
2366 //linePtr value is correct
2367 int oldLength = lineEnds.length;
2368 int[] old = lineEnds;
2369 lineEnds = new int[oldLength + INCREMENT];
2370 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2371 lineEnds[linePtr] = separatorPos;
2378 public final void pushUnicodeLineSeparator() {
2379 // isUnicode means that the \r or \n has been read as a unicode character
2381 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2383 final int INCREMENT = 250;
2384 //currentCharacter is at position currentPosition-1
2386 if (this.checkNonExternalizedStringLiterals) {
2387 // reinitialize the current line for non externalize strings purpose
2392 if (currentCharacter == '\r') {
2393 int separatorPos = currentPosition - 6;
2394 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2396 //System.out.println("CR-" + separatorPos);
2398 lineEnds[++linePtr] = separatorPos;
2399 } catch (IndexOutOfBoundsException e) {
2400 //linePtr value is correct
2401 int oldLength = lineEnds.length;
2402 int[] old = lineEnds;
2403 lineEnds = new int[oldLength + INCREMENT];
2404 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2405 lineEnds[linePtr] = separatorPos;
2407 // look-ahead for merged cr+lf
2408 if (source[currentPosition] == '\n') {
2409 //System.out.println("look-ahead LF-" + currentPosition);
2410 lineEnds[linePtr] = currentPosition;
2418 if (currentCharacter == '\n') {
2419 //must merge eventual cr followed by lf
2420 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2421 //System.out.println("merge LF-" + (currentPosition - 1));
2422 lineEnds[linePtr] = currentPosition - 6;
2424 int separatorPos = currentPosition - 6;
2425 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2427 // System.out.println("LF-" + separatorPos);
2429 lineEnds[++linePtr] = separatorPos;
2430 } catch (IndexOutOfBoundsException e) {
2431 //linePtr value is correct
2432 int oldLength = lineEnds.length;
2433 int[] old = lineEnds;
2434 lineEnds = new int[oldLength + INCREMENT];
2435 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2436 lineEnds[linePtr] = separatorPos;
2443 public final void recordComment(boolean isJavadoc) {
2445 // a new annotation comment is recorded
2447 commentStops[++commentPtr] =
2448 isJavadoc ? currentPosition : -currentPosition;
2449 } catch (IndexOutOfBoundsException e) {
2450 int oldStackLength = commentStops.length;
2451 int[] oldStack = commentStops;
2452 commentStops = new int[oldStackLength + 30];
2453 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2454 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2455 //grows the positions buffers too
2456 int[] old = commentStarts;
2457 commentStarts = new int[oldStackLength + 30];
2458 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2461 //the buffer is of a correct size here
2462 commentStarts[commentPtr] = startPosition;
2464 public void resetTo(int begin, int end) {
2465 //reset the scanner to a given position where it may rescan again
2468 initialPosition = startPosition = currentPosition = begin;
2469 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2470 commentPtr = -1; // reset comment stack
2473 public final void scanSingleQuotedEscapeCharacter()
2474 throws InvalidInputException {
2475 // the string with "\\u" is a legal string of two chars \ and u
2476 //thus we use a direct access to the source (for regular cases).
2478 if (unicodeAsBackSlash) {
2479 // consume next character
2480 unicodeAsBackSlash = false;
2481 if (((currentCharacter = source[currentPosition++]) == '\\')
2482 && (source[currentPosition] == 'u')) {
2483 getNextUnicodeChar();
2485 if (withoutUnicodePtr != 0) {
2486 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2490 currentCharacter = source[currentPosition++];
2491 switch (currentCharacter) {
2493 currentCharacter = '\'';
2496 currentCharacter = '\\';
2499 currentCharacter = '\\';
2504 public final void scanDoubleQuotedEscapeCharacter()
2505 throws InvalidInputException {
2506 // the string with "\\u" is a legal string of two chars \ and u
2507 //thus we use a direct access to the source (for regular cases).
2509 if (unicodeAsBackSlash) {
2510 // consume next character
2511 unicodeAsBackSlash = false;
2512 if (((currentCharacter = source[currentPosition++]) == '\\')
2513 && (source[currentPosition] == 'u')) {
2514 getNextUnicodeChar();
2516 if (withoutUnicodePtr != 0) {
2517 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2521 currentCharacter = source[currentPosition++];
2522 switch (currentCharacter) {
2524 // currentCharacter = '\b';
2527 currentCharacter = '\t';
2530 currentCharacter = '\n';
2533 // currentCharacter = '\f';
2536 currentCharacter = '\r';
2539 currentCharacter = '\"';
2542 currentCharacter = '\'';
2545 currentCharacter = '\\';
2548 currentCharacter = '$';
2551 // -----------octal escape--------------
2553 // OctalDigit OctalDigit
2554 // ZeroToThree OctalDigit OctalDigit
2556 int number = Character.getNumericValue(currentCharacter);
2557 if (number >= 0 && number <= 7) {
2558 boolean zeroToThreeNot = number > 3;
2560 .isDigit(currentCharacter = source[currentPosition++])) {
2561 int digit = Character.getNumericValue(currentCharacter);
2562 if (digit >= 0 && digit <= 7) {
2563 number = (number * 8) + digit;
2565 .isDigit(currentCharacter = source[currentPosition++])) {
2566 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2569 digit = Character.getNumericValue(currentCharacter);
2570 if (digit >= 0 && digit <= 7) {
2571 // has read \ZeroToThree OctalDigit OctalDigit
2572 number = (number * 8) + digit;
2573 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2577 } else { // has read \OctalDigit NonDigit--> ignore last character
2580 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2583 } else { // has read \OctalDigit --> ignore last character
2587 throw new InvalidInputException(INVALID_ESCAPE);
2588 currentCharacter = (char) number;
2591 // throw new InvalidInputException(INVALID_ESCAPE);
2595 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2596 // return scanIdentifierOrKeyword( false );
2599 public int scanIdentifierOrKeyword(boolean isVariable)
2600 throws InvalidInputException {
2603 //first dispatch on the first char.
2604 //then the length. If there are several
2605 //keywors with the same length AND the same first char, then do another
2606 //disptach on the second char :-)...cool....but fast !
2608 useAssertAsAnIndentifier = false;
2610 while (getNextCharAsJavaIdentifierPart()) {
2614 return TokenNameVariable;
2619 if (withoutUnicodePtr == 0)
2621 //quick test on length == 1 but not on length > 12 while most identifier
2622 //have a length which is <= 12...but there are lots of identifier with
2626 if ((length = currentPosition - startPosition) == 1)
2627 return TokenNameIdentifier;
2629 data = new char[length];
2630 index = startPosition;
2631 for (int i = 0; i < length; i++) {
2632 data[i] = Character.toLowerCase(source[index + i]);
2636 if ((length = withoutUnicodePtr) == 1)
2637 return TokenNameIdentifier;
2638 // data = withoutUnicodeBuffer;
2639 data = new char[withoutUnicodeBuffer.length];
2640 for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2641 data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2646 firstLetter = data[index];
2647 switch (firstLetter) {
2649 case 'a' : // as and array
2652 if ((data[++index] == 's')) {
2655 return TokenNameIdentifier;
2658 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2659 return TokenNameAND;
2661 return TokenNameIdentifier;
2664 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2665 // return TokenNamearray;
2667 // return TokenNameIdentifier;
2669 return TokenNameIdentifier;
2674 if ((data[++index] == 'r')
2675 && (data[++index] == 'e')
2676 && (data[++index] == 'a')
2677 && (data[++index] == 'k'))
2678 return TokenNamebreak;
2680 return TokenNameIdentifier;
2682 return TokenNameIdentifier;
2685 case 'c' : //case class continue
2688 if ((data[++index] == 'a')
2689 && (data[++index] == 's')
2690 && (data[++index] == 'e'))
2691 return TokenNamecase;
2693 return TokenNameIdentifier;
2695 if ((data[++index] == 'l')
2696 && (data[++index] == 'a')
2697 && (data[++index] == 's')
2698 && (data[++index] == 's'))
2699 return TokenNameclass;
2701 return TokenNameIdentifier;
2703 if ((data[++index] == 'o')
2704 && (data[++index] == 'n')
2705 && (data[++index] == 't')
2706 && (data[++index] == 'i')
2707 && (data[++index] == 'n')
2708 && (data[++index] == 'u')
2709 && (data[++index] == 'e'))
2710 return TokenNamecontinue;
2712 return TokenNameIdentifier;
2714 return TokenNameIdentifier;
2717 case 'd' : //define default do
2720 if ((data[++index] == 'o'))
2723 return TokenNameIdentifier;
2725 if ((data[++index] == 'e')
2726 && (data[++index] == 'f')
2727 && (data[++index] == 'i')
2728 && (data[++index] == 'n')
2729 && (data[++index] == 'e'))
2730 return TokenNamedefine;
2732 return TokenNameIdentifier;
2734 if ((data[++index] == 'e')
2735 && (data[++index] == 'f')
2736 && (data[++index] == 'a')
2737 && (data[++index] == 'u')
2738 && (data[++index] == 'l')
2739 && (data[++index] == 't'))
2740 return TokenNamedefault;
2742 return TokenNameIdentifier;
2744 return TokenNameIdentifier;
2746 case 'e' : //echo else elseif extends
2749 if ((data[++index] == 'c')
2750 && (data[++index] == 'h')
2751 && (data[++index] == 'o'))
2752 return TokenNameecho;
2754 (data[index] == 'l')
2755 && (data[++index] == 's')
2756 && (data[++index] == 'e'))
2757 return TokenNameelse;
2759 return TokenNameIdentifier;
2761 if ((data[++index] == 'n')
2762 && (data[++index] == 'd')
2763 && (data[++index] == 'i')
2764 && (data[++index] == 'f'))
2765 return TokenNameendif;
2767 return TokenNameIdentifier;
2769 if ((data[++index] == 'n')
2770 && (data[++index] == 'd')
2771 && (data[++index] == 'f')
2772 && (data[++index] == 'o')
2773 && (data[++index] == 'r'))
2774 return TokenNameendfor;
2776 (data[index] == 'l')
2777 && (data[++index] == 's')
2778 && (data[++index] == 'e')
2779 && (data[++index] == 'i')
2780 && (data[++index] == 'f'))
2781 return TokenNameelseif;
2783 return TokenNameIdentifier;
2785 if ((data[++index] == 'x')
2786 && (data[++index] == 't')
2787 && (data[++index] == 'e')
2788 && (data[++index] == 'n')
2789 && (data[++index] == 'd')
2790 && (data[++index] == 's'))
2791 return TokenNameextends;
2793 return TokenNameIdentifier;
2794 case 8 : // endwhile
2795 if ((data[++index] == 'n')
2796 && (data[++index] == 'd')
2797 && (data[++index] == 'w')
2798 && (data[++index] == 'h')
2799 && (data[++index] == 'i')
2800 && (data[++index] == 'l')
2801 && (data[++index] == 'e'))
2802 return TokenNameendwhile;
2804 return TokenNameIdentifier;
2805 case 9 : // endswitch
2806 if ((data[++index] == 'n')
2807 && (data[++index] == 'd')
2808 && (data[++index] == 's')
2809 && (data[++index] == 'w')
2810 && (data[++index] == 'i')
2811 && (data[++index] == 't')
2812 && (data[++index] == 'c')
2813 && (data[++index] == 'h'))
2814 return TokenNameendswitch;
2816 return TokenNameIdentifier;
2817 case 10 : // endforeach
2818 if ((data[++index] == 'n')
2819 && (data[++index] == 'd')
2820 && (data[++index] == 'f')
2821 && (data[++index] == 'o')
2822 && (data[++index] == 'r')
2823 && (data[++index] == 'e')
2824 && (data[++index] == 'a')
2825 && (data[++index] == 'c')
2826 && (data[++index] == 'h'))
2827 return TokenNameendforeach;
2829 return TokenNameIdentifier;
2832 return TokenNameIdentifier;
2835 case 'f' : //for false function
2838 if ((data[++index] == 'o') && (data[++index] == 'r'))
2839 return TokenNamefor;
2841 return TokenNameIdentifier;
2843 if ((data[++index] == 'a')
2844 && (data[++index] == 'l')
2845 && (data[++index] == 's')
2846 && (data[++index] == 'e'))
2847 return TokenNamefalse;
2849 return TokenNameIdentifier;
2850 case 7 : // function
2851 if ((data[++index] == 'o')
2852 && (data[++index] == 'r')
2853 && (data[++index] == 'e')
2854 && (data[++index] == 'a')
2855 && (data[++index] == 'c')
2856 && (data[++index] == 'h'))
2857 return TokenNameforeach;
2859 return TokenNameIdentifier;
2860 case 8 : // function
2861 if ((data[++index] == 'u')
2862 && (data[++index] == 'n')
2863 && (data[++index] == 'c')
2864 && (data[++index] == 't')
2865 && (data[++index] == 'i')
2866 && (data[++index] == 'o')
2867 && (data[++index] == 'n'))
2868 return TokenNamefunction;
2870 return TokenNameIdentifier;
2872 return TokenNameIdentifier;
2876 if ((data[++index] == 'l')
2877 && (data[++index] == 'o')
2878 && (data[++index] == 'b')
2879 && (data[++index] == 'a')
2880 && (data[++index] == 'l')) {
2881 return TokenNameglobal;
2884 return TokenNameIdentifier;
2889 if (data[++index] == 'f')
2892 return TokenNameIdentifier;
2894 // if ((data[++index] == 'n') && (data[++index] == 't'))
2895 // return TokenNameint;
2897 // return TokenNameIdentifier;
2899 if ((data[++index] == 'n')
2900 && (data[++index] == 'c')
2901 && (data[++index] == 'l')
2902 && (data[++index] == 'u')
2903 && (data[++index] == 'd')
2904 && (data[++index] == 'e'))
2905 return TokenNameinclude;
2907 return TokenNameIdentifier;
2909 if ((data[++index] == 'n')
2910 && (data[++index] == 'c')
2911 && (data[++index] == 'l')
2912 && (data[++index] == 'u')
2913 && (data[++index] == 'd')
2914 && (data[++index] == 'e')
2915 && (data[++index] == '_')
2916 && (data[++index] == 'o')
2917 && (data[++index] == 'n')
2918 && (data[++index] == 'c')
2919 && (data[++index] == 'e'))
2920 return TokenNameinclude_once;
2922 return TokenNameIdentifier;
2924 return TokenNameIdentifier;
2929 if ((data[++index] == 'i')
2930 && (data[++index] == 's')
2931 && (data[++index] == 't')) {
2932 return TokenNamelist;
2935 return TokenNameIdentifier;
2937 case 'n' : // new null
2940 if ((data[++index] == 'e') && (data[++index] == 'w'))
2941 return TokenNamenew;
2943 return TokenNameIdentifier;
2945 if ((data[++index] == 'u')
2946 && (data[++index] == 'l')
2947 && (data[++index] == 'l'))
2948 return TokenNamenull;
2950 return TokenNameIdentifier;
2953 return TokenNameIdentifier;
2955 case 'o' : // or old_function
2957 if (data[++index] == 'r') {
2961 // if (length == 12) {
2962 // if ((data[++index] == 'l')
2963 // && (data[++index] == 'd')
2964 // && (data[++index] == '_')
2965 // && (data[++index] == 'f')
2966 // && (data[++index] == 'u')
2967 // && (data[++index] == 'n')
2968 // && (data[++index] == 'c')
2969 // && (data[++index] == 't')
2970 // && (data[++index] == 'i')
2971 // && (data[++index] == 'o')
2972 // && (data[++index] == 'n')) {
2973 // return TokenNameold_function;
2976 return TokenNameIdentifier;
2980 if ((data[++index] == 'r')
2981 && (data[++index] == 'i')
2982 && (data[++index] == 'n')
2983 && (data[++index] == 't')) {
2984 return TokenNameprint;
2987 return TokenNameIdentifier;
2988 case 'r' : //return require require_once
2990 if ((data[++index] == 'e')
2991 && (data[++index] == 't')
2992 && (data[++index] == 'u')
2993 && (data[++index] == 'r')
2994 && (data[++index] == 'n')) {
2995 return TokenNamereturn;
2997 } else if (length == 7) {
2998 if ((data[++index] == 'e')
2999 && (data[++index] == 'q')
3000 && (data[++index] == 'u')
3001 && (data[++index] == 'i')
3002 && (data[++index] == 'r')
3003 && (data[++index] == 'e')) {
3004 return TokenNamerequire;
3006 } else if (length == 12) {
3007 if ((data[++index] == 'e')
3008 && (data[++index] == 'q')
3009 && (data[++index] == 'u')
3010 && (data[++index] == 'i')
3011 && (data[++index] == 'r')
3012 && (data[++index] == 'e')
3013 && (data[++index] == '_')
3014 && (data[++index] == 'o')
3015 && (data[++index] == 'n')
3016 && (data[++index] == 'c')
3017 && (data[++index] == 'e')) {
3018 return TokenNamerequire_once;
3021 return TokenNameIdentifier;
3023 case 's' : //static switch
3026 if (data[++index] == 't')
3027 if ((data[++index] == 'a')
3028 && (data[++index] == 't')
3029 && (data[++index] == 'i')
3030 && (data[++index] == 'c')) {
3031 return TokenNamestatic;
3033 return TokenNameIdentifier;
3035 (data[index] == 'w')
3036 && (data[++index] == 'i')
3037 && (data[++index] == 't')
3038 && (data[++index] == 'c')
3039 && (data[++index] == 'h'))
3040 return TokenNameswitch;
3042 return TokenNameIdentifier;
3044 return TokenNameIdentifier;
3051 if ((data[++index] == 'r')
3052 && (data[++index] == 'u')
3053 && (data[++index] == 'e'))
3054 return TokenNametrue;
3056 return TokenNameIdentifier;
3057 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3058 // return TokenNamethis;
3061 return TokenNameIdentifier;
3067 if ((data[++index] == 'a') && (data[++index] == 'r'))
3068 return TokenNamevar;
3070 return TokenNameIdentifier;
3073 return TokenNameIdentifier;
3079 if ((data[++index] == 'h')
3080 && (data[++index] == 'i')
3081 && (data[++index] == 'l')
3082 && (data[++index] == 'e'))
3083 return TokenNamewhile;
3085 return TokenNameIdentifier;
3086 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3087 //return TokenNamewidefp ;
3089 //return TokenNameIdentifier;
3091 return TokenNameIdentifier;
3097 if ((data[++index] == 'o') && (data[++index] == 'r'))
3098 return TokenNameXOR;
3100 return TokenNameIdentifier;
3103 return TokenNameIdentifier;
3106 return TokenNameIdentifier;
3109 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3111 //when entering this method the currentCharacter is the firt
3112 //digit of the number , i.e. it may be preceeded by a . when
3115 boolean floating = dotPrefix;
3116 if ((!dotPrefix) && (currentCharacter == '0')) {
3117 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3118 //force the first char of the hexa number do exist...
3119 // consume next character
3120 unicodeAsBackSlash = false;
3121 if (((currentCharacter = source[currentPosition++]) == '\\')
3122 && (source[currentPosition] == 'u')) {
3123 getNextUnicodeChar();
3125 if (withoutUnicodePtr != 0) {
3126 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3129 if (Character.digit(currentCharacter, 16) == -1)
3130 throw new InvalidInputException(INVALID_HEXA);
3132 while (getNextCharAsDigit(16)) {
3134 // if (getNextChar('l', 'L') >= 0)
3135 // return TokenNameLongLiteral;
3137 return TokenNameIntegerLiteral;
3140 //there is x or X in the number
3141 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3142 if (getNextCharAsDigit()) {
3143 //-------------potential octal-----------------
3144 while (getNextCharAsDigit()) {
3147 // if (getNextChar('l', 'L') >= 0) {
3148 // return TokenNameLongLiteral;
3151 // if (getNextChar('f', 'F') >= 0) {
3152 // return TokenNameFloatingPointLiteral;
3155 if (getNextChar('d', 'D') >= 0) {
3156 return TokenNameDoubleLiteral;
3157 } else { //make the distinction between octal and float ....
3158 if (getNextChar('.')) { //bingo ! ....
3159 while (getNextCharAsDigit()) {
3161 if (getNextChar('e', 'E') >= 0) {
3162 // consume next character
3163 unicodeAsBackSlash = false;
3164 if (((currentCharacter = source[currentPosition++]) == '\\')
3165 && (source[currentPosition] == 'u')) {
3166 getNextUnicodeChar();
3168 if (withoutUnicodePtr != 0) {
3169 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3173 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3174 // consume next character
3175 unicodeAsBackSlash = false;
3176 if (((currentCharacter = source[currentPosition++]) == '\\')
3177 && (source[currentPosition] == 'u')) {
3178 getNextUnicodeChar();
3180 if (withoutUnicodePtr != 0) {
3181 withoutUnicodeBuffer[++withoutUnicodePtr] =
3186 if (!Character.isDigit(currentCharacter))
3187 throw new InvalidInputException(INVALID_FLOAT);
3188 while (getNextCharAsDigit()) {
3191 // if (getNextChar('f', 'F') >= 0)
3192 // return TokenNameFloatingPointLiteral;
3193 getNextChar('d', 'D'); //jump over potential d or D
3194 return TokenNameDoubleLiteral;
3196 return TokenNameIntegerLiteral;
3204 while (getNextCharAsDigit()) {
3207 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3208 // return TokenNameLongLiteral;
3210 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3211 while (getNextCharAsDigit()) {
3216 //if floating is true both exponant and suffix may be optional
3218 if (getNextChar('e', 'E') >= 0) {
3220 // consume next character
3221 unicodeAsBackSlash = false;
3222 if (((currentCharacter = source[currentPosition++]) == '\\')
3223 && (source[currentPosition] == 'u')) {
3224 getNextUnicodeChar();
3226 if (withoutUnicodePtr != 0) {
3227 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3231 if ((currentCharacter == '-')
3232 || (currentCharacter == '+')) { // consume next character
3233 unicodeAsBackSlash = false;
3234 if (((currentCharacter = source[currentPosition++]) == '\\')
3235 && (source[currentPosition] == 'u')) {
3236 getNextUnicodeChar();
3238 if (withoutUnicodePtr != 0) {
3239 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3243 if (!Character.isDigit(currentCharacter))
3244 throw new InvalidInputException(INVALID_FLOAT);
3245 while (getNextCharAsDigit()) {
3249 if (getNextChar('d', 'D') >= 0)
3250 return TokenNameDoubleLiteral;
3251 // if (getNextChar('f', 'F') >= 0)
3252 // return TokenNameFloatingPointLiteral;
3254 //the long flag has been tested before
3256 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3259 * Search the line number corresponding to a specific position
3262 public final int getLineNumber(int position) {
3264 if (lineEnds == null)
3266 int length = linePtr + 1;
3269 int g = 0, d = length - 1;
3273 if (position < lineEnds[m]) {
3275 } else if (position > lineEnds[m]) {
3281 if (position < lineEnds[m]) {
3287 public void setPHPMode(boolean mode) {
3291 public final void setSource(char[] source) {
3292 //the source-buffer is set to sourceString
3294 if (source == null) {
3295 this.source = new char[0];
3297 this.source = source;
3300 initialPosition = currentPosition = 0;
3301 containsAssertKeyword = false;
3302 withoutUnicodeBuffer = new char[this.source.length];
3306 public String toString() {
3307 if (startPosition == source.length)
3308 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3309 if (currentPosition > source.length)
3310 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3312 char front[] = new char[startPosition];
3313 System.arraycopy(source, 0, front, 0, startPosition);
3315 int middleLength = (currentPosition - 1) - startPosition + 1;
3317 if (middleLength > -1) {
3318 middle = new char[middleLength];
3319 System.arraycopy(source, startPosition, middle, 0, middleLength);
3321 middle = new char[0];
3324 char end[] = new char[source.length - (currentPosition - 1)];
3327 (currentPosition - 1) + 1,
3330 source.length - (currentPosition - 1) - 1);
3332 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3333 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3336 public final String toStringAction(int act) {
3338 case TokenNameERROR :
3339 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3340 case TokenNameStopPHP :
3341 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3342 case TokenNameIdentifier :
3343 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3344 case TokenNameVariable :
3345 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3347 return "as"; //$NON-NLS-1$
3348 case TokenNamebreak :
3349 return "break"; //$NON-NLS-1$
3350 case TokenNamecase :
3351 return "case"; //$NON-NLS-1$
3352 case TokenNameclass :
3353 return "class"; //$NON-NLS-1$
3354 case TokenNamecontinue :
3355 return "continue"; //$NON-NLS-1$
3356 case TokenNamedefault :
3357 return "default"; //$NON-NLS-1$
3358 case TokenNamedefine :
3359 return "define"; //$NON-NLS-1$
3361 return "do"; //$NON-NLS-1$
3362 case TokenNameecho :
3363 return "echo"; //$NON-NLS-1$
3364 case TokenNameelse :
3365 return "else"; //$NON-NLS-1$
3366 case TokenNameelseif :
3367 return "elseif"; //$NON-NLS-1$
3368 case TokenNameendfor :
3369 return "endfor"; //$NON-NLS-1$
3370 case TokenNameendforeach :
3371 return "endforeach"; //$NON-NLS-1$
3372 case TokenNameendif :
3373 return "endif"; //$NON-NLS-1$
3374 case TokenNameendswitch :
3375 return "endswitch"; //$NON-NLS-1$
3376 case TokenNameendwhile :
3377 return "endwhile"; //$NON-NLS-1$
3378 case TokenNameextends :
3379 return "extends"; //$NON-NLS-1$
3380 case TokenNamefalse :
3381 return "false"; //$NON-NLS-1$
3383 return "for"; //$NON-NLS-1$
3384 case TokenNameforeach :
3385 return "foreach"; //$NON-NLS-1$
3386 case TokenNamefunction :
3387 return "function"; //$NON-NLS-1$
3388 case TokenNameglobal :
3389 return "global"; //$NON-NLS-1$
3391 return "if"; //$NON-NLS-1$
3392 case TokenNameinclude :
3393 return "include"; //$NON-NLS-1$
3394 case TokenNameinclude_once :
3395 return "include_once"; //$NON-NLS-1$
3396 case TokenNamelist :
3397 return "list"; //$NON-NLS-1$
3399 return "new"; //$NON-NLS-1$
3400 case TokenNamenull :
3401 return "null"; //$NON-NLS-1$
3402 case TokenNameprint :
3403 return "print"; //$NON-NLS-1$
3404 case TokenNamerequire :
3405 return "require"; //$NON-NLS-1$
3406 case TokenNamerequire_once :
3407 return "require_once"; //$NON-NLS-1$
3408 case TokenNamereturn :
3409 return "return"; //$NON-NLS-1$
3410 case TokenNamestatic :
3411 return "static"; //$NON-NLS-1$
3412 case TokenNameswitch :
3413 return "switch"; //$NON-NLS-1$
3414 case TokenNametrue :
3415 return "true"; //$NON-NLS-1$
3417 return "var"; //$NON-NLS-1$
3418 case TokenNamewhile :
3419 return "while"; //$NON-NLS-1$
3420 case TokenNameIntegerLiteral :
3421 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3422 case TokenNameDoubleLiteral :
3423 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3424 case TokenNameStringLiteral :
3425 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3426 case TokenNameStringConstant :
3427 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3428 case TokenNameStringInterpolated :
3429 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3430 case TokenNameHEREDOC :
3431 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3433 case TokenNamePLUS_PLUS :
3434 return "++"; //$NON-NLS-1$
3435 case TokenNameMINUS_MINUS :
3436 return "--"; //$NON-NLS-1$
3437 case TokenNameEQUAL_EQUAL :
3438 return "=="; //$NON-NLS-1$
3439 case TokenNameEQUAL_GREATER :
3440 return "=>"; //$NON-NLS-1$
3441 case TokenNameLESS_EQUAL :
3442 return "<="; //$NON-NLS-1$
3443 case TokenNameGREATER_EQUAL :
3444 return ">="; //$NON-NLS-1$
3445 case TokenNameNOT_EQUAL :
3446 return "!="; //$NON-NLS-1$
3447 case TokenNameLEFT_SHIFT :
3448 return "<<"; //$NON-NLS-1$
3449 case TokenNameRIGHT_SHIFT :
3450 return ">>"; //$NON-NLS-1$
3451 case TokenNamePLUS_EQUAL :
3452 return "+="; //$NON-NLS-1$
3453 case TokenNameMINUS_EQUAL :
3454 return "-="; //$NON-NLS-1$
3455 case TokenNameMULTIPLY_EQUAL :
3456 return "*="; //$NON-NLS-1$
3457 case TokenNameDIVIDE_EQUAL :
3458 return "/="; //$NON-NLS-1$
3459 case TokenNameAND_EQUAL :
3460 return "&="; //$NON-NLS-1$
3461 case TokenNameOR_EQUAL :
3462 return "|="; //$NON-NLS-1$
3463 case TokenNameXOR_EQUAL :
3464 return "^="; //$NON-NLS-1$
3465 case TokenNameREMAINDER_EQUAL :
3466 return "%="; //$NON-NLS-1$
3467 case TokenNameLEFT_SHIFT_EQUAL :
3468 return "<<="; //$NON-NLS-1$
3469 case TokenNameRIGHT_SHIFT_EQUAL :
3470 return ">>="; //$NON-NLS-1$
3471 case TokenNameOR_OR :
3472 return "||"; //$NON-NLS-1$
3473 case TokenNameAND_AND :
3474 return "&&"; //$NON-NLS-1$
3475 case TokenNamePLUS :
3476 return "+"; //$NON-NLS-1$
3477 case TokenNameMINUS :
3478 return "-"; //$NON-NLS-1$
3479 case TokenNameMINUS_GREATER :
3482 return "!"; //$NON-NLS-1$
3483 case TokenNameREMAINDER :
3484 return "%"; //$NON-NLS-1$
3486 return "^"; //$NON-NLS-1$
3488 return "&"; //$NON-NLS-1$
3489 case TokenNameMULTIPLY :
3490 return "*"; //$NON-NLS-1$
3492 return "|"; //$NON-NLS-1$
3493 case TokenNameTWIDDLE :
3494 return "~"; //$NON-NLS-1$
3495 case TokenNameTWIDDLE_EQUAL :
3496 return "~="; //$NON-NLS-1$
3497 case TokenNameDIVIDE :
3498 return "/"; //$NON-NLS-1$
3499 case TokenNameGREATER :
3500 return ">"; //$NON-NLS-1$
3501 case TokenNameLESS :
3502 return "<"; //$NON-NLS-1$
3503 case TokenNameLPAREN :
3504 return "("; //$NON-NLS-1$
3505 case TokenNameRPAREN :
3506 return ")"; //$NON-NLS-1$
3507 case TokenNameLBRACE :
3508 return "{"; //$NON-NLS-1$
3509 case TokenNameRBRACE :
3510 return "}"; //$NON-NLS-1$
3511 case TokenNameLBRACKET :
3512 return "["; //$NON-NLS-1$
3513 case TokenNameRBRACKET :
3514 return "]"; //$NON-NLS-1$
3515 case TokenNameSEMICOLON :
3516 return ";"; //$NON-NLS-1$
3517 case TokenNameQUESTION :
3518 return "?"; //$NON-NLS-1$
3519 case TokenNameCOLON :
3520 return ":"; //$NON-NLS-1$
3521 case TokenNameCOMMA :
3522 return ","; //$NON-NLS-1$
3524 return "."; //$NON-NLS-1$
3525 case TokenNameEQUAL :
3526 return "="; //$NON-NLS-1$
3529 case TokenNameDOLLAR_LBRACE :
3532 return "EOF"; //$NON-NLS-1$
3534 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3539 boolean tokenizeComments,
3540 boolean tokenizeWhiteSpace,
3541 boolean checkNonExternalizedStringLiterals) {
3545 checkNonExternalizedStringLiterals,
3550 boolean tokenizeComments,
3551 boolean tokenizeWhiteSpace,
3552 boolean checkNonExternalizedStringLiterals,
3553 boolean assertMode) {
3554 this.eofPosition = Integer.MAX_VALUE;
3555 this.tokenizeComments = tokenizeComments;
3556 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3557 this.checkNonExternalizedStringLiterals =
3558 checkNonExternalizedStringLiterals;
3559 this.assertMode = assertMode;
3562 private void checkNonExternalizeString() throws InvalidInputException {
3563 if (currentLine == null)
3565 parseTags(currentLine);
3568 private void parseTags(NLSLine line) throws InvalidInputException {
3569 String s = new String(getCurrentTokenSource());
3570 int pos = s.indexOf(TAG_PREFIX);
3571 int lineLength = line.size();
3573 int start = pos + TAG_PREFIX_LENGTH;
3574 int end = s.indexOf(TAG_POSTFIX, start);
3575 String index = s.substring(start, end);
3578 i = Integer.parseInt(index) - 1;
3579 // Tags are one based not zero based.
3580 } catch (NumberFormatException e) {
3581 i = -1; // we don't want to consider this as a valid NLS tag
3583 if (line.exists(i)) {
3586 pos = s.indexOf(TAG_PREFIX, start);
3589 this.nonNLSStrings = new StringLiteral[lineLength];
3590 int nonNLSCounter = 0;
3591 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3592 StringLiteral literal = (StringLiteral) iterator.next();
3593 if (literal != null) {
3594 this.nonNLSStrings[nonNLSCounter++] = literal;
3597 if (nonNLSCounter == 0) {
3598 this.nonNLSStrings = null;
3602 this.wasNonExternalizedStringLiteral = true;
3603 if (nonNLSCounter != lineLength) {
3607 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),