1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray =
120 new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121 static final int TableSize = 30, InternalTableSize = 6;
123 public static final int OptimizedLength = 6;
125 final char[][][][] charArray_length =
126 new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0,
155 public static final int RoundBracket = 0;
156 public static final int SquareBracket = 1;
157 public static final int CurlyBracket = 2;
158 public static final int BracketKinds = 3;
162 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
163 this(tokenizeComments, tokenizeWhiteSpace, false);
165 public final boolean atEnd() {
166 // This code is not relevant if source is
167 // Only a part of the real stream input
169 return source.length == currentPosition;
171 public char[] getCurrentIdentifierSource() {
172 //return the token REAL source (aka unicodes are precomputed)
175 if (withoutUnicodePtr != 0)
176 //0 is used as a fast test flag so the real first char is in position 1
178 withoutUnicodeBuffer,
180 result = new char[withoutUnicodePtr],
184 int length = currentPosition - startPosition;
185 switch (length) { // see OptimizedLength
187 return optimizedCurrentTokenSource1();
189 return optimizedCurrentTokenSource2();
191 return optimizedCurrentTokenSource3();
193 return optimizedCurrentTokenSource4();
195 return optimizedCurrentTokenSource5();
197 return optimizedCurrentTokenSource6();
203 result = new char[length],
209 public int getCurrentTokenEndPosition() {
210 return this.currentPosition - 1;
212 public final char[] getCurrentTokenSource() {
213 // Return the token REAL source (aka unicodes are precomputed)
216 if (withoutUnicodePtr != 0)
217 // 0 is used as a fast test flag so the real first char is in position 1
219 withoutUnicodeBuffer,
221 result = new char[withoutUnicodePtr],
229 result = new char[length = currentPosition - startPosition],
236 public final char[] getCurrentTokenSource(int startPos) {
237 // Return the token REAL source (aka unicodes are precomputed)
240 if (withoutUnicodePtr != 0)
241 // 0 is used as a fast test flag so the real first char is in position 1
243 withoutUnicodeBuffer,
245 result = new char[withoutUnicodePtr],
253 result = new char[length = currentPosition - startPos],
260 public final char[] getCurrentTokenSourceString() {
261 //return the token REAL source (aka unicodes are precomputed).
262 //REMOVE the two " that are at the beginning and the end.
265 if (withoutUnicodePtr != 0)
266 //0 is used as a fast test flag so the real first char is in position 1
267 System.arraycopy(withoutUnicodeBuffer, 2,
268 //2 is 1 (real start) + 1 (to jump over the ")
269 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
275 result = new char[length = currentPosition - startPosition - 2],
281 public int getCurrentTokenStartPosition() {
282 return this.startPosition;
285 * Search the source position corresponding to the end of a given line number
287 * Line numbers are 1-based, and relative to the scanner initialPosition.
288 * Character positions are 0-based.
290 * In case the given line number is inconsistent, answers -1.
292 public final int getLineEnd(int lineNumber) {
294 if (lineEnds == null)
296 if (lineNumber >= lineEnds.length)
301 if (lineNumber == lineEnds.length - 1)
303 return lineEnds[lineNumber - 1];
304 // next line start one character behind the lineEnd of the previous line
307 * Search the source position corresponding to the beginning of a given line number
309 * Line numbers are 1-based, and relative to the scanner initialPosition.
310 * Character positions are 0-based.
312 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
314 * In case the given line number is inconsistent, answers -1.
316 public final int getLineStart(int lineNumber) {
318 if (lineEnds == null)
320 if (lineNumber >= lineEnds.length)
326 return initialPosition;
327 return lineEnds[lineNumber - 2] + 1;
328 // next line start one character behind the lineEnd of the previous line
330 public final boolean getNextChar(char testedChar) {
332 //handle the case of unicode.
333 //when a unicode appears then we must use a buffer that holds char internal values
334 //At the end of this method currentCharacter holds the new visited char
335 //and currentPosition points right next after it
336 //Both previous lines are true if the currentCharacter is == to the testedChar
337 //On false, no side effect has occured.
339 //ALL getNextChar.... ARE OPTIMIZED COPIES
341 int temp = currentPosition;
343 if (((currentCharacter = source[currentPosition++]) == '\\')
344 && (source[currentPosition] == 'u')) {
345 //-------------unicode traitement ------------
349 while (source[currentPosition] == 'u') {
354 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
356 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
358 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
360 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
362 currentPosition = temp;
366 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
367 if (currentCharacter != testedChar) {
368 currentPosition = temp;
371 unicodeAsBackSlash = currentCharacter == '\\';
373 //need the unicode buffer
374 if (withoutUnicodePtr == 0) {
375 //buffer all the entries that have been left aside....
376 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
380 withoutUnicodeBuffer,
384 //fill the buffer with the char
385 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
388 } //-------------end unicode traitement--------------
390 if (currentCharacter != testedChar) {
391 currentPosition = temp;
394 unicodeAsBackSlash = false;
395 if (withoutUnicodePtr != 0)
396 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
399 } catch (IndexOutOfBoundsException e) {
400 unicodeAsBackSlash = false;
401 currentPosition = temp;
405 public final int getNextChar(char testedChar1, char testedChar2) {
406 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
407 //test can be done with (x==0) for the first and (x>0) for the second
408 //handle the case of unicode.
409 //when a unicode appears then we must use a buffer that holds char internal values
410 //At the end of this method currentCharacter holds the new visited char
411 //and currentPosition points right next after it
412 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
413 //On false, no side effect has occured.
415 //ALL getNextChar.... ARE OPTIMIZED COPIES
417 int temp = currentPosition;
420 if (((currentCharacter = source[currentPosition++]) == '\\')
421 && (source[currentPosition] == 'u')) {
422 //-------------unicode traitement ------------
426 while (source[currentPosition] == 'u') {
431 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
433 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
435 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
437 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
439 currentPosition = temp;
443 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
444 if (currentCharacter == testedChar1)
446 else if (currentCharacter == testedChar2)
449 currentPosition = temp;
453 //need the unicode buffer
454 if (withoutUnicodePtr == 0) {
455 //buffer all the entries that have been left aside....
456 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
460 withoutUnicodeBuffer,
464 //fill the buffer with the char
465 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
467 } //-------------end unicode traitement--------------
469 if (currentCharacter == testedChar1)
471 else if (currentCharacter == testedChar2)
474 currentPosition = temp;
478 if (withoutUnicodePtr != 0)
479 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
482 } catch (IndexOutOfBoundsException e) {
483 currentPosition = temp;
487 public final boolean getNextCharAsDigit() {
489 //handle the case of unicode.
490 //when a unicode appears then we must use a buffer that holds char internal values
491 //At the end of this method currentCharacter holds the new visited char
492 //and currentPosition points right next after it
493 //Both previous lines are true if the currentCharacter is a digit
494 //On false, no side effect has occured.
496 //ALL getNextChar.... ARE OPTIMIZED COPIES
498 int temp = currentPosition;
500 if (((currentCharacter = source[currentPosition++]) == '\\')
501 && (source[currentPosition] == 'u')) {
502 //-------------unicode traitement ------------
506 while (source[currentPosition] == 'u') {
511 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
513 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
515 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
517 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
519 currentPosition = temp;
523 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
524 if (!Character.isDigit(currentCharacter)) {
525 currentPosition = temp;
529 //need the unicode buffer
530 if (withoutUnicodePtr == 0) {
531 //buffer all the entries that have been left aside....
532 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
536 withoutUnicodeBuffer,
540 //fill the buffer with the char
541 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
543 } //-------------end unicode traitement--------------
545 if (!Character.isDigit(currentCharacter)) {
546 currentPosition = temp;
549 if (withoutUnicodePtr != 0)
550 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
553 } catch (IndexOutOfBoundsException e) {
554 currentPosition = temp;
558 public final boolean getNextCharAsDigit(int radix) {
560 //handle the case of unicode.
561 //when a unicode appears then we must use a buffer that holds char internal values
562 //At the end of this method currentCharacter holds the new visited char
563 //and currentPosition points right next after it
564 //Both previous lines are true if the currentCharacter is a digit base on radix
565 //On false, no side effect has occured.
567 //ALL getNextChar.... ARE OPTIMIZED COPIES
569 int temp = currentPosition;
571 if (((currentCharacter = source[currentPosition++]) == '\\')
572 && (source[currentPosition] == 'u')) {
573 //-------------unicode traitement ------------
577 while (source[currentPosition] == 'u') {
582 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
584 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
586 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
588 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
590 currentPosition = temp;
594 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
595 if (Character.digit(currentCharacter, radix) == -1) {
596 currentPosition = temp;
600 //need the unicode buffer
601 if (withoutUnicodePtr == 0) {
602 //buffer all the entries that have been left aside....
603 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
607 withoutUnicodeBuffer,
611 //fill the buffer with the char
612 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
614 } //-------------end unicode traitement--------------
616 if (Character.digit(currentCharacter, radix) == -1) {
617 currentPosition = temp;
620 if (withoutUnicodePtr != 0)
621 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
624 } catch (IndexOutOfBoundsException e) {
625 currentPosition = temp;
629 public boolean getNextCharAsJavaIdentifierPart() {
631 //handle the case of unicode.
632 //when a unicode appears then we must use a buffer that holds char internal values
633 //At the end of this method currentCharacter holds the new visited char
634 //and currentPosition points right next after it
635 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
636 //On false, no side effect has occured.
638 //ALL getNextChar.... ARE OPTIMIZED COPIES
640 int temp = currentPosition;
642 if (((currentCharacter = source[currentPosition++]) == '\\')
643 && (source[currentPosition] == 'u')) {
644 //-------------unicode traitement ------------
648 while (source[currentPosition] == 'u') {
653 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
655 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
657 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
659 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
661 currentPosition = temp;
665 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
666 if (!Character.isJavaIdentifierPart(currentCharacter)) {
667 currentPosition = temp;
671 //need the unicode buffer
672 if (withoutUnicodePtr == 0) {
673 //buffer all the entries that have been left aside....
674 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
678 withoutUnicodeBuffer,
682 //fill the buffer with the char
683 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
685 } //-------------end unicode traitement--------------
687 if (!Character.isJavaIdentifierPart(currentCharacter)) {
688 currentPosition = temp;
692 if (withoutUnicodePtr != 0)
693 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
696 } catch (IndexOutOfBoundsException e) {
697 currentPosition = temp;
702 public int getNextToken() throws InvalidInputException {
705 startPosition = currentPosition;
706 currentCharacter = source[currentPosition++];
707 if (currentCharacter == '<') {
708 if (getNextChar('?')) {
709 currentCharacter = source[currentPosition++];
710 if ((currentCharacter == ' ')
711 || Character.isWhitespace(currentCharacter)) {
713 startPosition = currentPosition;
717 (currentCharacter == 'P') || (currentCharacter == 'p');
719 int test = getNextChar('H', 'h');
721 test = getNextChar('P', 'p');
724 startPosition = currentPosition;
733 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
734 if (recordLineSeparator) {
741 } //-----------------end switch while try--------------------
742 catch (IndexOutOfBoundsException e) {
749 jumpOverMethodBody();
751 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
754 while (true) { //loop for jumping over comments
755 withoutUnicodePtr = 0;
756 //start with a new token (even comment written with unicode )
758 // ---------Consume white space and handles startPosition---------
759 int whiteStart = currentPosition;
760 boolean isWhiteSpace;
762 startPosition = currentPosition;
763 if (((currentCharacter = source[currentPosition++]) == '\\')
764 && (source[currentPosition] == 'u')) {
765 isWhiteSpace = jumpOverUnicodeWhiteSpace();
767 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
768 checkNonExternalizeString();
769 if (recordLineSeparator) {
776 (currentCharacter == ' ')
777 || Character.isWhitespace(currentCharacter);
779 } while (isWhiteSpace);
780 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
781 // reposition scanner in case we are interested by spaces as tokens
783 startPosition = whiteStart;
784 return TokenNameWHITESPACE;
786 //little trick to get out in the middle of a source compuation
787 if (currentPosition > eofPosition)
790 // ---------Identify the next token-------------
792 switch (currentCharacter) {
794 return TokenNameLPAREN;
796 return TokenNameRPAREN;
798 return TokenNameLBRACE;
800 return TokenNameRBRACE;
802 return TokenNameLBRACKET;
804 return TokenNameRBRACKET;
806 return TokenNameSEMICOLON;
808 return TokenNameCOMMA;
811 if (getNextCharAsDigit())
812 return scanNumber(true);
817 if ((test = getNextChar('+', '=')) == 0)
818 return TokenNamePLUS_PLUS;
820 return TokenNamePLUS_EQUAL;
821 return TokenNamePLUS;
826 if ((test = getNextChar('-', '=')) == 0)
827 return TokenNameMINUS_MINUS;
829 return TokenNameMINUS_EQUAL;
830 if (getNextChar('>'))
831 return TokenNameMINUS_GREATER;
833 return TokenNameMINUS;
836 if (getNextChar('='))
837 return TokenNameTWIDDLE_EQUAL;
838 return TokenNameTWIDDLE;
840 if (getNextChar('='))
841 return TokenNameNOT_EQUAL;
844 if (getNextChar('='))
845 return TokenNameMULTIPLY_EQUAL;
846 return TokenNameMULTIPLY;
848 if (getNextChar('='))
849 return TokenNameREMAINDER_EQUAL;
850 return TokenNameREMAINDER;
854 if ((test = getNextChar('=', '<')) == 0)
855 return TokenNameLESS_EQUAL;
857 if (getNextChar('='))
858 return TokenNameLEFT_SHIFT_EQUAL;
859 if (getNextChar('<')) {
860 int heredocStart = currentPosition;
861 int heredocLength = 0;
862 currentCharacter = source[currentPosition++];
863 if (Character.isJavaIdentifierStart(currentCharacter)) {
864 currentCharacter = source[currentPosition++];
866 return TokenNameERROR;
868 while (Character.isJavaIdentifierPart(currentCharacter)) {
869 currentCharacter = source[currentPosition++];
872 heredocLength = currentPosition - heredocStart - 1;
874 // heredoc end-tag determination
875 boolean endTag = true;
878 ch = source[currentPosition++];
879 if (ch == '\r' || ch == '\n') {
880 if (recordLineSeparator) {
885 for (int i = 0; i < heredocLength; i++) {
886 if (source[currentPosition + i]
887 != source[heredocStart + i]) {
893 currentPosition += heredocLength - 1;
894 currentCharacter = source[currentPosition++];
895 break; // do...while loop
903 return TokenNameHEREDOC;
905 return TokenNameLEFT_SHIFT;
907 return TokenNameLESS;
912 if ((test = getNextChar('=', '>')) == 0)
913 return TokenNameGREATER_EQUAL;
915 if ((test = getNextChar('=', '>')) == 0)
916 return TokenNameRIGHT_SHIFT_EQUAL;
917 return TokenNameRIGHT_SHIFT;
919 return TokenNameGREATER;
922 if (getNextChar('='))
923 return TokenNameEQUAL_EQUAL;
924 if (getNextChar('>'))
925 return TokenNameEQUAL_GREATER;
926 return TokenNameEQUAL;
930 if ((test = getNextChar('&', '=')) == 0)
931 return TokenNameAND_AND;
933 return TokenNameAND_EQUAL;
939 if ((test = getNextChar('|', '=')) == 0)
940 return TokenNameOR_OR;
942 return TokenNameOR_EQUAL;
946 if (getNextChar('='))
947 return TokenNameXOR_EQUAL;
950 if (getNextChar('>')) {
952 return TokenNameStopPHP;
954 return TokenNameQUESTION;
956 if (getNextChar(':'))
957 return TokenNameCOLON_COLON;
958 return TokenNameCOLON;
964 // if ((test = getNextChar('\n', '\r')) == 0) {
965 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
968 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
969 // for (int lookAhead = 0;
972 // if (currentPosition + lookAhead
975 // if (source[currentPosition + lookAhead]
978 // if (source[currentPosition + lookAhead]
980 // currentPosition += lookAhead + 1;
984 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
987 // if (getNextChar('\'')) {
988 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
989 // for (int lookAhead = 0;
992 // if (currentPosition + lookAhead
995 // if (source[currentPosition + lookAhead]
998 // if (source[currentPosition + lookAhead]
1000 // currentPosition += lookAhead + 1;
1004 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1006 // if (getNextChar('\\'))
1007 // scanEscapeCharacter();
1008 // else { // consume next character
1009 // unicodeAsBackSlash = false;
1010 // if (((currentCharacter = source[currentPosition++])
1012 // && (source[currentPosition] == 'u')) {
1013 // getNextUnicodeChar();
1015 // if (withoutUnicodePtr != 0) {
1016 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1017 // currentCharacter;
1021 // // if (getNextChar('\''))
1022 // // return TokenNameCharacterLiteral;
1023 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1024 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1025 // if (currentPosition + lookAhead == source.length)
1027 // if (source[currentPosition + lookAhead] == '\n')
1029 // if (source[currentPosition + lookAhead] == '\'') {
1030 // currentPosition += lookAhead + 1;
1034 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1037 // consume next character
1038 unicodeAsBackSlash = false;
1039 if (((currentCharacter = source[currentPosition++]) == '\\')
1040 && (source[currentPosition] == 'u')) {
1041 getNextUnicodeChar();
1043 if (withoutUnicodePtr != 0) {
1044 withoutUnicodeBuffer[++withoutUnicodePtr] =
1049 while (currentCharacter != '\'') {
1051 /**** in PHP \r and \n are valid in string literals ****/
1052 // if ((currentCharacter == '\n')
1053 // || (currentCharacter == '\r')) {
1054 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1055 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1056 // if (currentPosition + lookAhead == source.length)
1058 // if (source[currentPosition + lookAhead] == '\n')
1060 // if (source[currentPosition + lookAhead] == '\"') {
1061 // currentPosition += lookAhead + 1;
1065 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1067 if (currentCharacter == '\\') {
1068 int escapeSize = currentPosition;
1069 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1070 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1071 scanEscapeCharacter();
1072 escapeSize = currentPosition - escapeSize;
1073 if (withoutUnicodePtr == 0) {
1074 //buffer all the entries that have been left aside....
1076 currentPosition - escapeSize - 1 - startPosition;
1080 withoutUnicodeBuffer,
1083 withoutUnicodeBuffer[++withoutUnicodePtr] =
1085 } else { //overwrite the / in the buffer
1086 withoutUnicodeBuffer[withoutUnicodePtr] =
1088 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1089 withoutUnicodePtr--;
1093 // consume next character
1094 unicodeAsBackSlash = false;
1095 if (((currentCharacter = source[currentPosition++]) == '\\')
1096 && (source[currentPosition] == 'u')) {
1097 getNextUnicodeChar();
1099 if (withoutUnicodePtr != 0) {
1100 withoutUnicodeBuffer[++withoutUnicodePtr] =
1106 } catch (IndexOutOfBoundsException e) {
1107 throw new InvalidInputException(UNTERMINATED_STRING);
1108 } catch (InvalidInputException e) {
1109 if (e.getMessage().equals(INVALID_ESCAPE)) {
1110 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1111 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1112 if (currentPosition + lookAhead == source.length)
1114 if (source[currentPosition + lookAhead] == '\n')
1116 if (source[currentPosition + lookAhead] == '\'') {
1117 currentPosition += lookAhead + 1;
1125 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1126 if (currentLine == null) {
1127 currentLine = new NLSLine();
1128 lines.add(currentLine);
1132 getCurrentTokenSourceString(),
1134 currentPosition - 1));
1136 return TokenNameStringConstant;
1139 // consume next character
1140 unicodeAsBackSlash = false;
1141 if (((currentCharacter = source[currentPosition++]) == '\\')
1142 && (source[currentPosition] == 'u')) {
1143 getNextUnicodeChar();
1145 if (withoutUnicodePtr != 0) {
1146 withoutUnicodeBuffer[++withoutUnicodePtr] =
1151 while (currentCharacter != '"') {
1154 /**** in PHP \r and \n are valid in string literals ****/
1155 // if ((currentCharacter == '\n')
1156 // || (currentCharacter == '\r')) {
1157 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1158 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1159 // if (currentPosition + lookAhead == source.length)
1161 // if (source[currentPosition + lookAhead] == '\n')
1163 // if (source[currentPosition + lookAhead] == '\"') {
1164 // currentPosition += lookAhead + 1;
1168 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1170 if (currentCharacter == '\\') {
1171 int escapeSize = currentPosition;
1172 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1173 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1174 scanEscapeCharacter();
1175 escapeSize = currentPosition - escapeSize;
1176 if (withoutUnicodePtr == 0) {
1177 //buffer all the entries that have been left aside....
1179 currentPosition - escapeSize - 1 - startPosition;
1183 withoutUnicodeBuffer,
1186 withoutUnicodeBuffer[++withoutUnicodePtr] =
1188 } else { //overwrite the / in the buffer
1189 withoutUnicodeBuffer[withoutUnicodePtr] =
1191 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1192 withoutUnicodePtr--;
1196 // consume next character
1197 unicodeAsBackSlash = false;
1198 if (((currentCharacter = source[currentPosition++]) == '\\')
1199 && (source[currentPosition] == 'u')) {
1200 getNextUnicodeChar();
1202 if (withoutUnicodePtr != 0) {
1203 withoutUnicodeBuffer[++withoutUnicodePtr] =
1209 } catch (IndexOutOfBoundsException e) {
1210 throw new InvalidInputException(UNTERMINATED_STRING);
1211 } catch (InvalidInputException e) {
1212 if (e.getMessage().equals(INVALID_ESCAPE)) {
1213 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1214 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1215 if (currentPosition + lookAhead == source.length)
1217 if (source[currentPosition + lookAhead] == '\n')
1219 if (source[currentPosition + lookAhead] == '\"') {
1220 currentPosition += lookAhead + 1;
1228 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1229 if (currentLine == null) {
1230 currentLine = new NLSLine();
1231 lines.add(currentLine);
1235 getCurrentTokenSourceString(),
1237 currentPosition - 1));
1239 return TokenNameStringLiteral;
1242 // consume next character
1243 unicodeAsBackSlash = false;
1244 if (((currentCharacter = source[currentPosition++]) == '\\')
1245 && (source[currentPosition] == 'u')) {
1246 getNextUnicodeChar();
1248 if (withoutUnicodePtr != 0) {
1249 withoutUnicodeBuffer[++withoutUnicodePtr] =
1254 while (currentCharacter != '`') {
1257 /**** in PHP \r and \n are valid in string literals ****/
1258 // if ((currentCharacter == '\n')
1259 // || (currentCharacter == '\r')) {
1260 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1261 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1262 // if (currentPosition + lookAhead == source.length)
1264 // if (source[currentPosition + lookAhead] == '\n')
1266 // if (source[currentPosition + lookAhead] == '\"') {
1267 // currentPosition += lookAhead + 1;
1271 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1273 if (currentCharacter == '\\') {
1274 int escapeSize = currentPosition;
1275 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1276 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1277 scanEscapeCharacter();
1278 escapeSize = currentPosition - escapeSize;
1279 if (withoutUnicodePtr == 0) {
1280 //buffer all the entries that have been left aside....
1282 currentPosition - escapeSize - 1 - startPosition;
1286 withoutUnicodeBuffer,
1289 withoutUnicodeBuffer[++withoutUnicodePtr] =
1291 } else { //overwrite the / in the buffer
1292 withoutUnicodeBuffer[withoutUnicodePtr] =
1294 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1295 withoutUnicodePtr--;
1299 // consume next character
1300 unicodeAsBackSlash = false;
1301 if (((currentCharacter = source[currentPosition++]) == '\\')
1302 && (source[currentPosition] == 'u')) {
1303 getNextUnicodeChar();
1305 if (withoutUnicodePtr != 0) {
1306 withoutUnicodeBuffer[++withoutUnicodePtr] =
1312 } catch (IndexOutOfBoundsException e) {
1313 throw new InvalidInputException(UNTERMINATED_STRING);
1314 } catch (InvalidInputException e) {
1315 if (e.getMessage().equals(INVALID_ESCAPE)) {
1316 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1317 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1318 if (currentPosition + lookAhead == source.length)
1320 if (source[currentPosition + lookAhead] == '\n')
1322 if (source[currentPosition + lookAhead] == '`') {
1323 currentPosition += lookAhead + 1;
1331 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1332 if (currentLine == null) {
1333 currentLine = new NLSLine();
1334 lines.add(currentLine);
1338 getCurrentTokenSourceString(),
1340 currentPosition - 1));
1342 return TokenNameStringInterpolated;
1347 if ((currentCharacter == '#')
1348 || (test = getNextChar('/', '*')) == 0) {
1350 int endPositionForLineComment = 0;
1351 try { //get the next char
1352 if (((currentCharacter = source[currentPosition++])
1354 && (source[currentPosition] == 'u')) {
1355 //-------------unicode traitement ------------
1356 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1358 while (source[currentPosition] == 'u') {
1362 Character.getNumericValue(source[currentPosition++]))
1366 Character.getNumericValue(source[currentPosition++]))
1370 Character.getNumericValue(source[currentPosition++]))
1374 Character.getNumericValue(source[currentPosition++]))
1377 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1380 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1384 //handle the \\u case manually into comment
1385 if (currentCharacter == '\\') {
1386 if (source[currentPosition] == '\\')
1388 } //jump over the \\
1389 boolean isUnicode = false;
1390 while (currentCharacter != '\r'
1391 && currentCharacter != '\n') {
1392 if (currentCharacter == '?') {
1393 if (getNextChar('>')) {
1394 startPosition = currentPosition - 2;
1396 return TokenNameStopPHP;
1402 if (((currentCharacter = source[currentPosition++])
1404 && (source[currentPosition] == 'u')) {
1406 //-------------unicode traitement ------------
1407 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1409 while (source[currentPosition] == 'u') {
1413 Character.getNumericValue(source[currentPosition++]))
1417 Character.getNumericValue(
1418 source[currentPosition++]))
1422 Character.getNumericValue(
1423 source[currentPosition++]))
1427 Character.getNumericValue(
1428 source[currentPosition++]))
1431 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1434 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1437 //handle the \\u case manually into comment
1438 if (currentCharacter == '\\') {
1439 if (source[currentPosition] == '\\')
1441 } //jump over the \\
1444 endPositionForLineComment = currentPosition - 6;
1446 endPositionForLineComment = currentPosition - 1;
1448 recordComment(false);
1449 if ((currentCharacter == '\r')
1450 || (currentCharacter == '\n')) {
1451 checkNonExternalizeString();
1452 if (recordLineSeparator) {
1454 pushUnicodeLineSeparator();
1456 pushLineSeparator();
1462 if (tokenizeComments) {
1464 currentPosition = endPositionForLineComment;
1465 // reset one character behind
1467 return TokenNameCOMMENT_LINE;
1469 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1470 if (tokenizeComments) {
1472 // reset one character behind
1473 return TokenNameCOMMENT_LINE;
1479 //traditional and annotation comment
1480 boolean isJavadoc = false, star = false;
1481 // consume next character
1482 unicodeAsBackSlash = false;
1483 if (((currentCharacter = source[currentPosition++]) == '\\')
1484 && (source[currentPosition] == 'u')) {
1485 getNextUnicodeChar();
1487 if (withoutUnicodePtr != 0) {
1488 withoutUnicodeBuffer[++withoutUnicodePtr] =
1493 if (currentCharacter == '*') {
1497 if ((currentCharacter == '\r')
1498 || (currentCharacter == '\n')) {
1499 checkNonExternalizeString();
1500 if (recordLineSeparator) {
1501 pushLineSeparator();
1506 try { //get the next char
1507 if (((currentCharacter = source[currentPosition++])
1509 && (source[currentPosition] == 'u')) {
1510 //-------------unicode traitement ------------
1511 getNextUnicodeChar();
1513 //handle the \\u case manually into comment
1514 if (currentCharacter == '\\') {
1515 if (source[currentPosition] == '\\')
1519 // empty comment is not a javadoc /**/
1520 if (currentCharacter == '/') {
1523 //loop until end of comment */
1524 while ((currentCharacter != '/') || (!star)) {
1525 if ((currentCharacter == '\r')
1526 || (currentCharacter == '\n')) {
1527 checkNonExternalizeString();
1528 if (recordLineSeparator) {
1529 pushLineSeparator();
1534 star = currentCharacter == '*';
1536 if (((currentCharacter = source[currentPosition++])
1538 && (source[currentPosition] == 'u')) {
1539 //-------------unicode traitement ------------
1540 getNextUnicodeChar();
1542 //handle the \\u case manually into comment
1543 if (currentCharacter == '\\') {
1544 if (source[currentPosition] == '\\')
1546 } //jump over the \\
1548 recordComment(isJavadoc);
1549 if (tokenizeComments) {
1551 return TokenNameCOMMENT_PHPDOC;
1552 return TokenNameCOMMENT_BLOCK;
1554 } catch (IndexOutOfBoundsException e) {
1555 throw new InvalidInputException(UNTERMINATED_COMMENT);
1559 if (getNextChar('='))
1560 return TokenNameDIVIDE_EQUAL;
1561 return TokenNameDIVIDE;
1565 return TokenNameEOF;
1566 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1567 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1570 if (currentCharacter == '$') {
1571 if (getNextChar('{'))
1572 return TokenNameDOLLAR_LBRACE;
1573 return scanIdentifierOrKeyword(true);
1575 if (Character.isJavaIdentifierStart(currentCharacter))
1576 return scanIdentifierOrKeyword(false);
1577 if (Character.isDigit(currentCharacter))
1578 return scanNumber(false);
1579 return TokenNameERROR;
1582 } //-----------------end switch while try--------------------
1583 catch (IndexOutOfBoundsException e) {
1586 return TokenNameEOF;
1589 public final void getNextUnicodeChar()
1590 throws IndexOutOfBoundsException, InvalidInputException {
1592 //handle the case of unicode.
1593 //when a unicode appears then we must use a buffer that holds char internal values
1594 //At the end of this method currentCharacter holds the new visited char
1595 //and currentPosition points right next after it
1597 //ALL getNextChar.... ARE OPTIMIZED COPIES
1599 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1601 while (source[currentPosition] == 'u') {
1606 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1608 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1610 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1612 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1614 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1616 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1617 //need the unicode buffer
1618 if (withoutUnicodePtr == 0) {
1619 //buffer all the entries that have been left aside....
1620 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1624 withoutUnicodeBuffer,
1628 //fill the buffer with the char
1629 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1631 unicodeAsBackSlash = currentCharacter == '\\';
1633 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1635 public final void jumpOverMethodBody() {
1637 this.wasAcr = false;
1640 while (true) { //loop for jumping over comments
1641 // ---------Consume white space and handles startPosition---------
1642 boolean isWhiteSpace;
1644 startPosition = currentPosition;
1645 if (((currentCharacter = source[currentPosition++]) == '\\')
1646 && (source[currentPosition] == 'u')) {
1647 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1649 if (recordLineSeparator
1650 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1651 pushLineSeparator();
1652 isWhiteSpace = Character.isWhitespace(currentCharacter);
1654 } while (isWhiteSpace);
1656 // -------consume token until } is found---------
1657 switch (currentCharacter) {
1669 test = getNextChar('\\');
1672 scanEscapeCharacter();
1673 } catch (InvalidInputException ex) {
1676 try { // consume next character
1677 unicodeAsBackSlash = false;
1678 if (((currentCharacter = source[currentPosition++]) == '\\')
1679 && (source[currentPosition] == 'u')) {
1680 getNextUnicodeChar();
1682 if (withoutUnicodePtr != 0) {
1683 withoutUnicodeBuffer[++withoutUnicodePtr] =
1687 } catch (InvalidInputException ex) {
1695 try { // consume next character
1696 unicodeAsBackSlash = false;
1697 if (((currentCharacter = source[currentPosition++]) == '\\')
1698 && (source[currentPosition] == 'u')) {
1699 getNextUnicodeChar();
1701 if (withoutUnicodePtr != 0) {
1702 withoutUnicodeBuffer[++withoutUnicodePtr] =
1706 } catch (InvalidInputException ex) {
1708 while (currentCharacter != '"') {
1709 if (currentCharacter == '\r') {
1710 if (source[currentPosition] == '\n')
1713 // the string cannot go further that the line
1715 if (currentCharacter == '\n') {
1717 // the string cannot go further that the line
1719 if (currentCharacter == '\\') {
1721 scanEscapeCharacter();
1722 } catch (InvalidInputException ex) {
1725 try { // consume next character
1726 unicodeAsBackSlash = false;
1727 if (((currentCharacter = source[currentPosition++]) == '\\')
1728 && (source[currentPosition] == 'u')) {
1729 getNextUnicodeChar();
1731 if (withoutUnicodePtr != 0) {
1732 withoutUnicodeBuffer[++withoutUnicodePtr] =
1736 } catch (InvalidInputException ex) {
1739 } catch (IndexOutOfBoundsException e) {
1746 if ((test = getNextChar('/', '*')) == 0) {
1750 if (((currentCharacter = source[currentPosition++]) == '\\')
1751 && (source[currentPosition] == 'u')) {
1752 //-------------unicode traitement ------------
1753 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1755 while (source[currentPosition] == 'u') {
1759 Character.getNumericValue(source[currentPosition++]))
1763 Character.getNumericValue(source[currentPosition++]))
1767 Character.getNumericValue(source[currentPosition++]))
1771 Character.getNumericValue(source[currentPosition++]))
1774 //error don't care of the value
1775 currentCharacter = 'A';
1776 } //something different from \n and \r
1779 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1783 while (currentCharacter != '\r'
1784 && currentCharacter != '\n') {
1786 if (((currentCharacter = source[currentPosition++])
1788 && (source[currentPosition] == 'u')) {
1789 //-------------unicode traitement ------------
1790 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1792 while (source[currentPosition] == 'u') {
1796 Character.getNumericValue(source[currentPosition++]))
1800 Character.getNumericValue(source[currentPosition++]))
1804 Character.getNumericValue(source[currentPosition++]))
1808 Character.getNumericValue(source[currentPosition++]))
1811 //error don't care of the value
1812 currentCharacter = 'A';
1813 } //something different from \n and \r
1816 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1820 if (recordLineSeparator
1821 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1822 pushLineSeparator();
1823 } catch (IndexOutOfBoundsException e) {
1824 } //an eof will them be generated
1828 //traditional and annotation comment
1829 boolean star = false;
1830 try { // consume next character
1831 unicodeAsBackSlash = false;
1832 if (((currentCharacter = source[currentPosition++]) == '\\')
1833 && (source[currentPosition] == 'u')) {
1834 getNextUnicodeChar();
1836 if (withoutUnicodePtr != 0) {
1837 withoutUnicodeBuffer[++withoutUnicodePtr] =
1841 } catch (InvalidInputException ex) {
1843 if (currentCharacter == '*') {
1846 if (recordLineSeparator
1847 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1848 pushLineSeparator();
1849 try { //get the next char
1850 if (((currentCharacter = source[currentPosition++]) == '\\')
1851 && (source[currentPosition] == 'u')) {
1852 //-------------unicode traitement ------------
1853 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1855 while (source[currentPosition] == 'u') {
1859 Character.getNumericValue(source[currentPosition++]))
1863 Character.getNumericValue(source[currentPosition++]))
1867 Character.getNumericValue(source[currentPosition++]))
1871 Character.getNumericValue(source[currentPosition++]))
1874 //error don't care of the value
1875 currentCharacter = 'A';
1876 } //something different from * and /
1879 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1882 //loop until end of comment */
1883 while ((currentCharacter != '/') || (!star)) {
1884 if (recordLineSeparator
1885 && ((currentCharacter == '\r')
1886 || (currentCharacter == '\n')))
1887 pushLineSeparator();
1888 star = currentCharacter == '*';
1890 if (((currentCharacter = source[currentPosition++])
1892 && (source[currentPosition] == 'u')) {
1893 //-------------unicode traitement ------------
1894 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1896 while (source[currentPosition] == 'u') {
1900 Character.getNumericValue(source[currentPosition++]))
1904 Character.getNumericValue(source[currentPosition++]))
1908 Character.getNumericValue(source[currentPosition++]))
1912 Character.getNumericValue(source[currentPosition++]))
1915 //error don't care of the value
1916 currentCharacter = 'A';
1917 } //something different from * and /
1920 (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1924 } catch (IndexOutOfBoundsException e) {
1933 if (Character.isJavaIdentifierStart(currentCharacter)
1934 || currentCharacter == '$') {
1936 scanIdentifierOrKeyword((currentCharacter == '$'));
1937 } catch (InvalidInputException ex) {
1941 if (Character.isDigit(currentCharacter)) {
1944 } catch (InvalidInputException ex) {
1950 //-----------------end switch while try--------------------
1951 } catch (IndexOutOfBoundsException e) {
1952 } catch (InvalidInputException e) {
1956 public final boolean jumpOverUnicodeWhiteSpace()
1957 throws InvalidInputException {
1959 //handle the case of unicode. Jump over the next whiteSpace
1960 //making startPosition pointing on the next available char
1961 //On false, the currentCharacter is filled up with a potential
1965 this.wasAcr = false;
1967 int unicodeSize = 6;
1969 while (source[currentPosition] == 'u') {
1974 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1976 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1978 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1980 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1982 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1985 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1986 if (recordLineSeparator
1987 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1988 pushLineSeparator();
1989 if (Character.isWhitespace(currentCharacter))
1992 //buffer the new char which is not a white space
1993 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1994 //withoutUnicodePtr == 1 is true here
1996 } catch (IndexOutOfBoundsException e) {
1997 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2000 public final int[] getLineEnds() {
2001 //return a bounded copy of this.lineEnds
2004 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2008 public char[] getSource() {
2011 final char[] optimizedCurrentTokenSource1() {
2012 //return always the same char[] build only once
2014 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2015 char charOne = source[startPosition];
2070 return new char[] { charOne };
2073 final char[] optimizedCurrentTokenSource2() {
2074 //try to return the same char[] build only once
2078 (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2080 char[][] table = charArray_length[0][hash];
2082 while (++i < InternalTableSize) {
2083 char[] charArray = table[i];
2084 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2087 //---------other side---------
2089 int max = newEntry2;
2090 while (++i <= max) {
2091 char[] charArray = table[i];
2092 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2095 //--------add the entry-------
2096 if (++max >= InternalTableSize)
2099 table[max] = (r = new char[] { c0, c1 });
2103 final char[] optimizedCurrentTokenSource3() {
2104 //try to return the same char[] build only once
2108 (((c0 = source[startPosition]) << 12)
2109 + ((c1 = source[startPosition + 1]) << 6)
2110 + (c2 = source[startPosition + 2]))
2112 char[][] table = charArray_length[1][hash];
2114 while (++i < InternalTableSize) {
2115 char[] charArray = table[i];
2116 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2119 //---------other side---------
2121 int max = newEntry3;
2122 while (++i <= max) {
2123 char[] charArray = table[i];
2124 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2127 //--------add the entry-------
2128 if (++max >= InternalTableSize)
2131 table[max] = (r = new char[] { c0, c1, c2 });
2135 final char[] optimizedCurrentTokenSource4() {
2136 //try to return the same char[] build only once
2138 char c0, c1, c2, c3;
2140 ((((long) (c0 = source[startPosition])) << 18)
2141 + ((c1 = source[startPosition + 1]) << 12)
2142 + ((c2 = source[startPosition + 2]) << 6)
2143 + (c3 = source[startPosition + 3]))
2145 char[][] table = charArray_length[2][(int) hash];
2147 while (++i < InternalTableSize) {
2148 char[] charArray = table[i];
2149 if ((c0 == charArray[0])
2150 && (c1 == charArray[1])
2151 && (c2 == charArray[2])
2152 && (c3 == charArray[3]))
2155 //---------other side---------
2157 int max = newEntry4;
2158 while (++i <= max) {
2159 char[] charArray = table[i];
2160 if ((c0 == charArray[0])
2161 && (c1 == charArray[1])
2162 && (c2 == charArray[2])
2163 && (c3 == charArray[3]))
2166 //--------add the entry-------
2167 if (++max >= InternalTableSize)
2170 table[max] = (r = new char[] { c0, c1, c2, c3 });
2175 final char[] optimizedCurrentTokenSource5() {
2176 //try to return the same char[] build only once
2178 char c0, c1, c2, c3, c4;
2180 ((((long) (c0 = source[startPosition])) << 24)
2181 + (((long) (c1 = source[startPosition + 1])) << 18)
2182 + ((c2 = source[startPosition + 2]) << 12)
2183 + ((c3 = source[startPosition + 3]) << 6)
2184 + (c4 = source[startPosition + 4]))
2186 char[][] table = charArray_length[3][(int) hash];
2188 while (++i < InternalTableSize) {
2189 char[] charArray = table[i];
2190 if ((c0 == charArray[0])
2191 && (c1 == charArray[1])
2192 && (c2 == charArray[2])
2193 && (c3 == charArray[3])
2194 && (c4 == charArray[4]))
2197 //---------other side---------
2199 int max = newEntry5;
2200 while (++i <= max) {
2201 char[] charArray = table[i];
2202 if ((c0 == charArray[0])
2203 && (c1 == charArray[1])
2204 && (c2 == charArray[2])
2205 && (c3 == charArray[3])
2206 && (c4 == charArray[4]))
2209 //--------add the entry-------
2210 if (++max >= InternalTableSize)
2213 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2218 final char[] optimizedCurrentTokenSource6() {
2219 //try to return the same char[] build only once
2221 char c0, c1, c2, c3, c4, c5;
2223 ((((long) (c0 = source[startPosition])) << 32)
2224 + (((long) (c1 = source[startPosition + 1])) << 24)
2225 + (((long) (c2 = source[startPosition + 2])) << 18)
2226 + ((c3 = source[startPosition + 3]) << 12)
2227 + ((c4 = source[startPosition + 4]) << 6)
2228 + (c5 = source[startPosition + 5]))
2230 char[][] table = charArray_length[4][(int) hash];
2232 while (++i < InternalTableSize) {
2233 char[] charArray = table[i];
2234 if ((c0 == charArray[0])
2235 && (c1 == charArray[1])
2236 && (c2 == charArray[2])
2237 && (c3 == charArray[3])
2238 && (c4 == charArray[4])
2239 && (c5 == charArray[5]))
2242 //---------other side---------
2244 int max = newEntry6;
2245 while (++i <= max) {
2246 char[] charArray = table[i];
2247 if ((c0 == charArray[0])
2248 && (c1 == charArray[1])
2249 && (c2 == charArray[2])
2250 && (c3 == charArray[3])
2251 && (c4 == charArray[4])
2252 && (c5 == charArray[5]))
2255 //--------add the entry-------
2256 if (++max >= InternalTableSize)
2259 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2263 public final void pushLineSeparator() throws InvalidInputException {
2264 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2265 final int INCREMENT = 250;
2267 if (this.checkNonExternalizedStringLiterals) {
2268 // reinitialize the current line for non externalize strings purpose
2271 //currentCharacter is at position currentPosition-1
2274 if (currentCharacter == '\r') {
2275 int separatorPos = currentPosition - 1;
2276 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2278 //System.out.println("CR-" + separatorPos);
2280 lineEnds[++linePtr] = separatorPos;
2281 } catch (IndexOutOfBoundsException e) {
2282 //linePtr value is correct
2283 int oldLength = lineEnds.length;
2284 int[] old = lineEnds;
2285 lineEnds = new int[oldLength + INCREMENT];
2286 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2287 lineEnds[linePtr] = separatorPos;
2289 // look-ahead for merged cr+lf
2291 if (source[currentPosition] == '\n') {
2292 //System.out.println("look-ahead LF-" + currentPosition);
2293 lineEnds[linePtr] = currentPosition;
2299 } catch (IndexOutOfBoundsException e) {
2304 if (currentCharacter == '\n') {
2305 //must merge eventual cr followed by lf
2306 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2307 //System.out.println("merge LF-" + (currentPosition - 1));
2308 lineEnds[linePtr] = currentPosition - 1;
2310 int separatorPos = currentPosition - 1;
2311 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2313 // System.out.println("LF-" + separatorPos);
2315 lineEnds[++linePtr] = separatorPos;
2316 } catch (IndexOutOfBoundsException e) {
2317 //linePtr value is correct
2318 int oldLength = lineEnds.length;
2319 int[] old = lineEnds;
2320 lineEnds = new int[oldLength + INCREMENT];
2321 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2322 lineEnds[linePtr] = separatorPos;
2329 public final void pushUnicodeLineSeparator() {
2330 // isUnicode means that the \r or \n has been read as a unicode character
2332 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2334 final int INCREMENT = 250;
2335 //currentCharacter is at position currentPosition-1
2337 if (this.checkNonExternalizedStringLiterals) {
2338 // reinitialize the current line for non externalize strings purpose
2343 if (currentCharacter == '\r') {
2344 int separatorPos = currentPosition - 6;
2345 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2347 //System.out.println("CR-" + separatorPos);
2349 lineEnds[++linePtr] = separatorPos;
2350 } catch (IndexOutOfBoundsException e) {
2351 //linePtr value is correct
2352 int oldLength = lineEnds.length;
2353 int[] old = lineEnds;
2354 lineEnds = new int[oldLength + INCREMENT];
2355 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2356 lineEnds[linePtr] = separatorPos;
2358 // look-ahead for merged cr+lf
2359 if (source[currentPosition] == '\n') {
2360 //System.out.println("look-ahead LF-" + currentPosition);
2361 lineEnds[linePtr] = currentPosition;
2369 if (currentCharacter == '\n') {
2370 //must merge eventual cr followed by lf
2371 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2372 //System.out.println("merge LF-" + (currentPosition - 1));
2373 lineEnds[linePtr] = currentPosition - 6;
2375 int separatorPos = currentPosition - 6;
2376 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2378 // System.out.println("LF-" + separatorPos);
2380 lineEnds[++linePtr] = separatorPos;
2381 } catch (IndexOutOfBoundsException e) {
2382 //linePtr value is correct
2383 int oldLength = lineEnds.length;
2384 int[] old = lineEnds;
2385 lineEnds = new int[oldLength + INCREMENT];
2386 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2387 lineEnds[linePtr] = separatorPos;
2394 public final void recordComment(boolean isJavadoc) {
2396 // a new annotation comment is recorded
2398 commentStops[++commentPtr] =
2399 isJavadoc ? currentPosition : -currentPosition;
2400 } catch (IndexOutOfBoundsException e) {
2401 int oldStackLength = commentStops.length;
2402 int[] oldStack = commentStops;
2403 commentStops = new int[oldStackLength + 30];
2404 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2405 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2406 //grows the positions buffers too
2407 int[] old = commentStarts;
2408 commentStarts = new int[oldStackLength + 30];
2409 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2412 //the buffer is of a correct size here
2413 commentStarts[commentPtr] = startPosition;
2415 public void resetTo(int begin, int end) {
2416 //reset the scanner to a given position where it may rescan again
2419 initialPosition = startPosition = currentPosition = begin;
2420 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2421 commentPtr = -1; // reset comment stack
2424 public final void scanEscapeCharacter() throws InvalidInputException {
2425 // the string with "\\u" is a legal string of two chars \ and u
2426 //thus we use a direct access to the source (for regular cases).
2428 if (unicodeAsBackSlash) {
2429 // consume next character
2430 unicodeAsBackSlash = false;
2431 if (((currentCharacter = source[currentPosition++]) == '\\')
2432 && (source[currentPosition] == 'u')) {
2433 getNextUnicodeChar();
2435 if (withoutUnicodePtr != 0) {
2436 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2440 currentCharacter = source[currentPosition++];
2441 switch (currentCharacter) {
2443 currentCharacter = '\b';
2446 currentCharacter = '\t';
2449 currentCharacter = '\n';
2452 currentCharacter = '\f';
2455 currentCharacter = '\r';
2458 currentCharacter = '\"';
2461 currentCharacter = '\'';
2464 currentCharacter = '\\';
2467 // -----------octal escape--------------
2469 // OctalDigit OctalDigit
2470 // ZeroToThree OctalDigit OctalDigit
2472 int number = Character.getNumericValue(currentCharacter);
2473 if (number >= 0 && number <= 7) {
2474 boolean zeroToThreeNot = number > 3;
2476 .isDigit(currentCharacter = source[currentPosition++])) {
2477 int digit = Character.getNumericValue(currentCharacter);
2478 if (digit >= 0 && digit <= 7) {
2479 number = (number * 8) + digit;
2481 .isDigit(currentCharacter = source[currentPosition++])) {
2482 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2485 digit = Character.getNumericValue(currentCharacter);
2486 if (digit >= 0 && digit <= 7) {
2487 // has read \ZeroToThree OctalDigit OctalDigit
2488 number = (number * 8) + digit;
2489 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2493 } else { // has read \OctalDigit NonDigit--> ignore last character
2496 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2499 } else { // has read \OctalDigit --> ignore last character
2503 throw new InvalidInputException(INVALID_ESCAPE);
2504 currentCharacter = (char) number;
2507 // throw new InvalidInputException(INVALID_ESCAPE);
2511 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2512 // return scanIdentifierOrKeyword( false );
2515 public int scanIdentifierOrKeyword(boolean isVariable)
2516 throws InvalidInputException {
2519 //first dispatch on the first char.
2520 //then the length. If there are several
2521 //keywors with the same length AND the same first char, then do another
2522 //disptach on the second char :-)...cool....but fast !
2524 useAssertAsAnIndentifier = false;
2525 while (getNextCharAsJavaIdentifierPart()) {
2529 return TokenNameVariable;
2534 if (withoutUnicodePtr == 0)
2536 //quick test on length == 1 but not on length > 12 while most identifier
2537 //have a length which is <= 12...but there are lots of identifier with
2541 if ((length = currentPosition - startPosition) == 1)
2542 return TokenNameIdentifier;
2544 data = new char[length];
2545 index = startPosition;
2546 for (int i = 0; i < length; i++) {
2547 data[i] = Character.toLowerCase(source[index + i]);
2551 if ((length = withoutUnicodePtr) == 1)
2552 return TokenNameIdentifier;
2553 // data = withoutUnicodeBuffer;
2554 data = new char[withoutUnicodeBuffer.length];
2555 for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2556 data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2561 firstLetter = data[index];
2562 switch (firstLetter) {
2564 case 'a' : // as and array
2567 if ((data[++index] == 's')) {
2570 return TokenNameIdentifier;
2573 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2574 return TokenNameAND;
2576 return TokenNameIdentifier;
2579 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2580 // return TokenNamearray;
2582 // return TokenNameIdentifier;
2584 return TokenNameIdentifier;
2589 if ((data[++index] == 'r')
2590 && (data[++index] == 'e')
2591 && (data[++index] == 'a')
2592 && (data[++index] == 'k'))
2593 return TokenNamebreak;
2595 return TokenNameIdentifier;
2597 return TokenNameIdentifier;
2600 case 'c' : //case class continue
2603 if ((data[++index] == 'a')
2604 && (data[++index] == 's')
2605 && (data[++index] == 'e'))
2606 return TokenNamecase;
2608 return TokenNameIdentifier;
2610 if ((data[++index] == 'l')
2611 && (data[++index] == 'a')
2612 && (data[++index] == 's')
2613 && (data[++index] == 's'))
2614 return TokenNameclass;
2616 return TokenNameIdentifier;
2618 if ((data[++index] == 'o')
2619 && (data[++index] == 'n')
2620 && (data[++index] == 't')
2621 && (data[++index] == 'i')
2622 && (data[++index] == 'n')
2623 && (data[++index] == 'u')
2624 && (data[++index] == 'e'))
2625 return TokenNamecontinue;
2627 return TokenNameIdentifier;
2629 return TokenNameIdentifier;
2632 case 'd' : //define default do
2635 if ((data[++index] == 'o'))
2638 return TokenNameIdentifier;
2640 if ((data[++index] == 'e')
2641 && (data[++index] == 'f')
2642 && (data[++index] == 'i')
2643 && (data[++index] == 'n')
2644 && (data[++index] == 'e'))
2645 return TokenNamedefine;
2647 return TokenNameIdentifier;
2649 if ((data[++index] == 'e')
2650 && (data[++index] == 'f')
2651 && (data[++index] == 'a')
2652 && (data[++index] == 'u')
2653 && (data[++index] == 'l')
2654 && (data[++index] == 't'))
2655 return TokenNamedefault;
2657 return TokenNameIdentifier;
2659 return TokenNameIdentifier;
2661 case 'e' : //echo else elseif extends
2664 if ((data[++index] == 'c')
2665 && (data[++index] == 'h')
2666 && (data[++index] == 'o'))
2667 return TokenNameecho;
2669 (data[index] == 'l')
2670 && (data[++index] == 's')
2671 && (data[++index] == 'e'))
2672 return TokenNameelse;
2674 return TokenNameIdentifier;
2676 if ((data[++index] == 'n')
2677 && (data[++index] == 'd')
2678 && (data[++index] == 'i')
2679 && (data[++index] == 'f'))
2680 return TokenNameendif;
2682 return TokenNameIdentifier;
2684 if ((data[++index] == 'n')
2685 && (data[++index] == 'd')
2686 && (data[++index] == 'f')
2687 && (data[++index] == 'o')
2688 && (data[++index] == 'r'))
2689 return TokenNameendfor;
2691 (data[index] == 'l')
2692 && (data[++index] == 's')
2693 && (data[++index] == 'e')
2694 && (data[++index] == 'i')
2695 && (data[++index] == 'f'))
2696 return TokenNameelseif;
2698 return TokenNameIdentifier;
2700 if ((data[++index] == 'x')
2701 && (data[++index] == 't')
2702 && (data[++index] == 'e')
2703 && (data[++index] == 'n')
2704 && (data[++index] == 'd')
2705 && (data[++index] == 's'))
2706 return TokenNameextends;
2708 return TokenNameIdentifier;
2709 case 8 : // endwhile
2710 if ((data[++index] == 'n')
2711 && (data[++index] == 'd')
2712 && (data[++index] == 'w')
2713 && (data[++index] == 'h')
2714 && (data[++index] == 'i')
2715 && (data[++index] == 'l')
2716 && (data[++index] == 'e'))
2717 return TokenNameendwhile;
2719 return TokenNameIdentifier;
2720 case 9 : // endswitch
2721 if ((data[++index] == 'n')
2722 && (data[++index] == 'd')
2723 && (data[++index] == 's')
2724 && (data[++index] == 'w')
2725 && (data[++index] == 'i')
2726 && (data[++index] == 't')
2727 && (data[++index] == 'c')
2728 && (data[++index] == 'h'))
2729 return TokenNameendswitch;
2731 return TokenNameIdentifier;
2732 case 10 : // endforeach
2733 if ((data[++index] == 'n')
2734 && (data[++index] == 'd')
2735 && (data[++index] == 'f')
2736 && (data[++index] == 'o')
2737 && (data[++index] == 'r')
2738 && (data[++index] == 'e')
2739 && (data[++index] == 'a')
2740 && (data[++index] == 'c')
2741 && (data[++index] == 'h'))
2742 return TokenNameendforeach;
2744 return TokenNameIdentifier;
2747 return TokenNameIdentifier;
2750 case 'f' : //for false function
2753 if ((data[++index] == 'o') && (data[++index] == 'r'))
2754 return TokenNamefor;
2756 return TokenNameIdentifier;
2758 if ((data[++index] == 'a')
2759 && (data[++index] == 'l')
2760 && (data[++index] == 's')
2761 && (data[++index] == 'e'))
2762 return TokenNamefalse;
2764 return TokenNameIdentifier;
2765 case 7 : // function
2766 if ((data[++index] == 'o')
2767 && (data[++index] == 'r')
2768 && (data[++index] == 'e')
2769 && (data[++index] == 'a')
2770 && (data[++index] == 'c')
2771 && (data[++index] == 'h'))
2772 return TokenNameforeach;
2774 return TokenNameIdentifier;
2775 case 8 : // function
2776 if ((data[++index] == 'u')
2777 && (data[++index] == 'n')
2778 && (data[++index] == 'c')
2779 && (data[++index] == 't')
2780 && (data[++index] == 'i')
2781 && (data[++index] == 'o')
2782 && (data[++index] == 'n'))
2783 return TokenNamefunction;
2785 return TokenNameIdentifier;
2787 return TokenNameIdentifier;
2791 if ((data[++index] == 'l')
2792 && (data[++index] == 'o')
2793 && (data[++index] == 'b')
2794 && (data[++index] == 'a')
2795 && (data[++index] == 'l')) {
2796 return TokenNameglobal;
2799 return TokenNameIdentifier;
2804 if (data[++index] == 'f')
2807 return TokenNameIdentifier;
2809 // if ((data[++index] == 'n') && (data[++index] == 't'))
2810 // return TokenNameint;
2812 // return TokenNameIdentifier;
2814 if ((data[++index] == 'n')
2815 && (data[++index] == 'c')
2816 && (data[++index] == 'l')
2817 && (data[++index] == 'u')
2818 && (data[++index] == 'd')
2819 && (data[++index] == 'e'))
2820 return TokenNameinclude;
2822 return TokenNameIdentifier;
2824 if ((data[++index] == 'n')
2825 && (data[++index] == 'c')
2826 && (data[++index] == 'l')
2827 && (data[++index] == 'u')
2828 && (data[++index] == 'd')
2829 && (data[++index] == 'e')
2830 && (data[++index] == '_')
2831 && (data[++index] == 'o')
2832 && (data[++index] == 'n')
2833 && (data[++index] == 'c')
2834 && (data[++index] == 'e'))
2835 return TokenNameinclude_once;
2837 return TokenNameIdentifier;
2839 return TokenNameIdentifier;
2844 if ((data[++index] == 'i')
2845 && (data[++index] == 's')
2846 && (data[++index] == 't')) {
2847 return TokenNamelist;
2850 return TokenNameIdentifier;
2852 case 'n' : // new null
2855 if ((data[++index] == 'e') && (data[++index] == 'w'))
2856 return TokenNamenew;
2858 return TokenNameIdentifier;
2860 if ((data[++index] == 'u')
2861 && (data[++index] == 'l')
2862 && (data[++index] == 'l'))
2863 return TokenNamenull;
2865 return TokenNameIdentifier;
2868 return TokenNameIdentifier;
2870 case 'o' : // or old_function
2872 if (data[++index] == 'r') {
2876 // if (length == 12) {
2877 // if ((data[++index] == 'l')
2878 // && (data[++index] == 'd')
2879 // && (data[++index] == '_')
2880 // && (data[++index] == 'f')
2881 // && (data[++index] == 'u')
2882 // && (data[++index] == 'n')
2883 // && (data[++index] == 'c')
2884 // && (data[++index] == 't')
2885 // && (data[++index] == 'i')
2886 // && (data[++index] == 'o')
2887 // && (data[++index] == 'n')) {
2888 // return TokenNameold_function;
2891 return TokenNameIdentifier;
2895 if ((data[++index] == 'r')
2896 && (data[++index] == 'i')
2897 && (data[++index] == 'n')
2898 && (data[++index] == 't')) {
2899 return TokenNameprint;
2902 return TokenNameIdentifier;
2903 case 'r' : //return require require_once
2905 if ((data[++index] == 'e')
2906 && (data[++index] == 't')
2907 && (data[++index] == 'u')
2908 && (data[++index] == 'r')
2909 && (data[++index] == 'n')) {
2910 return TokenNamereturn;
2912 } else if (length == 7) {
2913 if ((data[++index] == 'e')
2914 && (data[++index] == 'q')
2915 && (data[++index] == 'u')
2916 && (data[++index] == 'i')
2917 && (data[++index] == 'r')
2918 && (data[++index] == 'e')) {
2919 return TokenNamerequire;
2921 } else if (length == 12) {
2922 if ((data[++index] == 'e')
2923 && (data[++index] == 'q')
2924 && (data[++index] == 'u')
2925 && (data[++index] == 'i')
2926 && (data[++index] == 'r')
2927 && (data[++index] == 'e')
2928 && (data[++index] == '_')
2929 && (data[++index] == 'o')
2930 && (data[++index] == 'n')
2931 && (data[++index] == 'c')
2932 && (data[++index] == 'e')) {
2933 return TokenNamerequire_once;
2936 return TokenNameIdentifier;
2938 case 's' : //static switch
2941 if (data[++index] == 't')
2942 if ((data[++index] == 'a')
2943 && (data[++index] == 't')
2944 && (data[++index] == 'i')
2945 && (data[++index] == 'c')) {
2946 return TokenNamestatic;
2948 return TokenNameIdentifier;
2950 (data[index] == 'w')
2951 && (data[++index] == 'i')
2952 && (data[++index] == 't')
2953 && (data[++index] == 'c')
2954 && (data[++index] == 'h'))
2955 return TokenNameswitch;
2957 return TokenNameIdentifier;
2959 return TokenNameIdentifier;
2966 if ((data[++index] == 'r')
2967 && (data[++index] == 'u')
2968 && (data[++index] == 'e'))
2969 return TokenNametrue;
2971 return TokenNameIdentifier;
2972 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2973 // return TokenNamethis;
2976 return TokenNameIdentifier;
2982 if ((data[++index] == 'a') && (data[++index] == 'r'))
2983 return TokenNamevar;
2985 return TokenNameIdentifier;
2988 return TokenNameIdentifier;
2994 if ((data[++index] == 'h')
2995 && (data[++index] == 'i')
2996 && (data[++index] == 'l')
2997 && (data[++index] == 'e'))
2998 return TokenNamewhile;
3000 return TokenNameIdentifier;
3001 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3002 //return TokenNamewidefp ;
3004 //return TokenNameIdentifier;
3006 return TokenNameIdentifier;
3012 if ((data[++index] == 'o') && (data[++index] == 'r'))
3013 return TokenNameXOR;
3015 return TokenNameIdentifier;
3018 return TokenNameIdentifier;
3021 return TokenNameIdentifier;
3024 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3026 //when entering this method the currentCharacter is the firt
3027 //digit of the number , i.e. it may be preceeded by a . when
3030 boolean floating = dotPrefix;
3031 if ((!dotPrefix) && (currentCharacter == '0')) {
3032 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3033 //force the first char of the hexa number do exist...
3034 // consume next character
3035 unicodeAsBackSlash = false;
3036 if (((currentCharacter = source[currentPosition++]) == '\\')
3037 && (source[currentPosition] == 'u')) {
3038 getNextUnicodeChar();
3040 if (withoutUnicodePtr != 0) {
3041 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3044 if (Character.digit(currentCharacter, 16) == -1)
3045 throw new InvalidInputException(INVALID_HEXA);
3047 while (getNextCharAsDigit(16)) {
3049 // if (getNextChar('l', 'L') >= 0)
3050 // return TokenNameLongLiteral;
3052 return TokenNameIntegerLiteral;
3055 //there is x or X in the number
3056 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3057 if (getNextCharAsDigit()) {
3058 //-------------potential octal-----------------
3059 while (getNextCharAsDigit()) {
3062 // if (getNextChar('l', 'L') >= 0) {
3063 // return TokenNameLongLiteral;
3066 // if (getNextChar('f', 'F') >= 0) {
3067 // return TokenNameFloatingPointLiteral;
3070 if (getNextChar('d', 'D') >= 0) {
3071 return TokenNameDoubleLiteral;
3072 } else { //make the distinction between octal and float ....
3073 if (getNextChar('.')) { //bingo ! ....
3074 while (getNextCharAsDigit()) {
3076 if (getNextChar('e', 'E') >= 0) {
3077 // consume next character
3078 unicodeAsBackSlash = false;
3079 if (((currentCharacter = source[currentPosition++]) == '\\')
3080 && (source[currentPosition] == 'u')) {
3081 getNextUnicodeChar();
3083 if (withoutUnicodePtr != 0) {
3084 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3088 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3089 // consume next character
3090 unicodeAsBackSlash = false;
3091 if (((currentCharacter = source[currentPosition++]) == '\\')
3092 && (source[currentPosition] == 'u')) {
3093 getNextUnicodeChar();
3095 if (withoutUnicodePtr != 0) {
3096 withoutUnicodeBuffer[++withoutUnicodePtr] =
3101 if (!Character.isDigit(currentCharacter))
3102 throw new InvalidInputException(INVALID_FLOAT);
3103 while (getNextCharAsDigit()) {
3106 // if (getNextChar('f', 'F') >= 0)
3107 // return TokenNameFloatingPointLiteral;
3108 getNextChar('d', 'D'); //jump over potential d or D
3109 return TokenNameDoubleLiteral;
3111 return TokenNameIntegerLiteral;
3119 while (getNextCharAsDigit()) {
3122 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3123 // return TokenNameLongLiteral;
3125 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3126 while (getNextCharAsDigit()) {
3131 //if floating is true both exponant and suffix may be optional
3133 if (getNextChar('e', 'E') >= 0) {
3135 // consume next character
3136 unicodeAsBackSlash = false;
3137 if (((currentCharacter = source[currentPosition++]) == '\\')
3138 && (source[currentPosition] == 'u')) {
3139 getNextUnicodeChar();
3141 if (withoutUnicodePtr != 0) {
3142 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3146 if ((currentCharacter == '-')
3147 || (currentCharacter == '+')) { // consume next character
3148 unicodeAsBackSlash = false;
3149 if (((currentCharacter = source[currentPosition++]) == '\\')
3150 && (source[currentPosition] == 'u')) {
3151 getNextUnicodeChar();
3153 if (withoutUnicodePtr != 0) {
3154 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3158 if (!Character.isDigit(currentCharacter))
3159 throw new InvalidInputException(INVALID_FLOAT);
3160 while (getNextCharAsDigit()) {
3164 if (getNextChar('d', 'D') >= 0)
3165 return TokenNameDoubleLiteral;
3166 // if (getNextChar('f', 'F') >= 0)
3167 // return TokenNameFloatingPointLiteral;
3169 //the long flag has been tested before
3171 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3174 * Search the line number corresponding to a specific position
3177 public final int getLineNumber(int position) {
3179 if (lineEnds == null)
3181 int length = linePtr + 1;
3184 int g = 0, d = length - 1;
3188 if (position < lineEnds[m]) {
3190 } else if (position > lineEnds[m]) {
3196 if (position < lineEnds[m]) {
3202 public void setPHPMode(boolean mode) {
3206 public final void setSource(char[] source) {
3207 //the source-buffer is set to sourceString
3209 if (source == null) {
3210 this.source = new char[0];
3212 this.source = source;
3215 initialPosition = currentPosition = 0;
3216 containsAssertKeyword = false;
3217 withoutUnicodeBuffer = new char[this.source.length];
3221 public String toString() {
3222 if (startPosition == source.length)
3223 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3224 if (currentPosition > source.length)
3225 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3227 char front[] = new char[startPosition];
3228 System.arraycopy(source, 0, front, 0, startPosition);
3230 int middleLength = (currentPosition - 1) - startPosition + 1;
3232 if (middleLength > -1) {
3233 middle = new char[middleLength];
3234 System.arraycopy(source, startPosition, middle, 0, middleLength);
3236 middle = new char[0];
3239 char end[] = new char[source.length - (currentPosition - 1)];
3242 (currentPosition - 1) + 1,
3245 source.length - (currentPosition - 1) - 1);
3247 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3248 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3251 public final String toStringAction(int act) {
3253 case TokenNameERROR :
3254 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3255 case TokenNameStopPHP :
3256 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3257 case TokenNameIdentifier :
3258 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3259 case TokenNameVariable :
3260 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3262 return "as"; //$NON-NLS-1$
3263 case TokenNamebreak :
3264 return "break"; //$NON-NLS-1$
3265 case TokenNamecase :
3266 return "case"; //$NON-NLS-1$
3267 case TokenNameclass :
3268 return "class"; //$NON-NLS-1$
3269 case TokenNamecontinue :
3270 return "continue"; //$NON-NLS-1$
3271 case TokenNamedefault :
3272 return "default"; //$NON-NLS-1$
3273 case TokenNamedefine :
3274 return "define"; //$NON-NLS-1$
3276 return "do"; //$NON-NLS-1$
3277 case TokenNameecho :
3278 return "echo"; //$NON-NLS-1$
3279 case TokenNameelse :
3280 return "else"; //$NON-NLS-1$
3281 case TokenNameelseif :
3282 return "elseif"; //$NON-NLS-1$
3283 case TokenNameendfor :
3284 return "endfor"; //$NON-NLS-1$
3285 case TokenNameendforeach :
3286 return "endforeach"; //$NON-NLS-1$
3287 case TokenNameendif :
3288 return "endif"; //$NON-NLS-1$
3289 case TokenNameendswitch :
3290 return "endswitch"; //$NON-NLS-1$
3291 case TokenNameendwhile :
3292 return "endwhile"; //$NON-NLS-1$
3293 case TokenNameextends :
3294 return "extends"; //$NON-NLS-1$
3295 case TokenNamefalse :
3296 return "false"; //$NON-NLS-1$
3298 return "for"; //$NON-NLS-1$
3299 case TokenNameforeach :
3300 return "foreach"; //$NON-NLS-1$
3301 case TokenNamefunction :
3302 return "function"; //$NON-NLS-1$
3303 case TokenNameglobal :
3304 return "global"; //$NON-NLS-1$
3306 return "if"; //$NON-NLS-1$
3307 case TokenNameinclude :
3308 return "include"; //$NON-NLS-1$
3309 case TokenNameinclude_once :
3310 return "include_once"; //$NON-NLS-1$
3311 case TokenNamelist :
3312 return "list"; //$NON-NLS-1$
3314 return "new"; //$NON-NLS-1$
3315 case TokenNamenull :
3316 return "null"; //$NON-NLS-1$
3317 case TokenNameprint :
3318 return "print"; //$NON-NLS-1$
3319 case TokenNamerequire :
3320 return "require"; //$NON-NLS-1$
3321 case TokenNamerequire_once :
3322 return "require_once"; //$NON-NLS-1$
3323 case TokenNamereturn :
3324 return "return"; //$NON-NLS-1$
3325 case TokenNamestatic :
3326 return "static"; //$NON-NLS-1$
3327 case TokenNameswitch :
3328 return "switch"; //$NON-NLS-1$
3329 case TokenNametrue :
3330 return "true"; //$NON-NLS-1$
3332 return "var"; //$NON-NLS-1$
3333 case TokenNamewhile :
3334 return "while"; //$NON-NLS-1$
3335 case TokenNameIntegerLiteral :
3336 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3337 case TokenNameDoubleLiteral :
3338 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3339 case TokenNameStringLiteral :
3340 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3341 case TokenNameStringConstant :
3342 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3343 case TokenNameStringInterpolated :
3344 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3345 case TokenNameHEREDOC :
3346 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3348 case TokenNamePLUS_PLUS :
3349 return "++"; //$NON-NLS-1$
3350 case TokenNameMINUS_MINUS :
3351 return "--"; //$NON-NLS-1$
3352 case TokenNameEQUAL_EQUAL :
3353 return "=="; //$NON-NLS-1$
3354 case TokenNameEQUAL_GREATER :
3355 return "=>"; //$NON-NLS-1$
3356 case TokenNameLESS_EQUAL :
3357 return "<="; //$NON-NLS-1$
3358 case TokenNameGREATER_EQUAL :
3359 return ">="; //$NON-NLS-1$
3360 case TokenNameNOT_EQUAL :
3361 return "!="; //$NON-NLS-1$
3362 case TokenNameLEFT_SHIFT :
3363 return "<<"; //$NON-NLS-1$
3364 case TokenNameRIGHT_SHIFT :
3365 return ">>"; //$NON-NLS-1$
3366 case TokenNamePLUS_EQUAL :
3367 return "+="; //$NON-NLS-1$
3368 case TokenNameMINUS_EQUAL :
3369 return "-="; //$NON-NLS-1$
3370 case TokenNameMULTIPLY_EQUAL :
3371 return "*="; //$NON-NLS-1$
3372 case TokenNameDIVIDE_EQUAL :
3373 return "/="; //$NON-NLS-1$
3374 case TokenNameAND_EQUAL :
3375 return "&="; //$NON-NLS-1$
3376 case TokenNameOR_EQUAL :
3377 return "|="; //$NON-NLS-1$
3378 case TokenNameXOR_EQUAL :
3379 return "^="; //$NON-NLS-1$
3380 case TokenNameREMAINDER_EQUAL :
3381 return "%="; //$NON-NLS-1$
3382 case TokenNameLEFT_SHIFT_EQUAL :
3383 return "<<="; //$NON-NLS-1$
3384 case TokenNameRIGHT_SHIFT_EQUAL :
3385 return ">>="; //$NON-NLS-1$
3386 case TokenNameOR_OR :
3387 return "||"; //$NON-NLS-1$
3388 case TokenNameAND_AND :
3389 return "&&"; //$NON-NLS-1$
3390 case TokenNamePLUS :
3391 return "+"; //$NON-NLS-1$
3392 case TokenNameMINUS :
3393 return "-"; //$NON-NLS-1$
3394 case TokenNameMINUS_GREATER :
3397 return "!"; //$NON-NLS-1$
3398 case TokenNameREMAINDER :
3399 return "%"; //$NON-NLS-1$
3401 return "^"; //$NON-NLS-1$
3403 return "&"; //$NON-NLS-1$
3404 case TokenNameMULTIPLY :
3405 return "*"; //$NON-NLS-1$
3407 return "|"; //$NON-NLS-1$
3408 case TokenNameTWIDDLE :
3409 return "~"; //$NON-NLS-1$
3410 case TokenNameTWIDDLE_EQUAL :
3411 return "~="; //$NON-NLS-1$
3412 case TokenNameDIVIDE :
3413 return "/"; //$NON-NLS-1$
3414 case TokenNameGREATER :
3415 return ">"; //$NON-NLS-1$
3416 case TokenNameLESS :
3417 return "<"; //$NON-NLS-1$
3418 case TokenNameLPAREN :
3419 return "("; //$NON-NLS-1$
3420 case TokenNameRPAREN :
3421 return ")"; //$NON-NLS-1$
3422 case TokenNameLBRACE :
3423 return "{"; //$NON-NLS-1$
3424 case TokenNameRBRACE :
3425 return "}"; //$NON-NLS-1$
3426 case TokenNameLBRACKET :
3427 return "["; //$NON-NLS-1$
3428 case TokenNameRBRACKET :
3429 return "]"; //$NON-NLS-1$
3430 case TokenNameSEMICOLON :
3431 return ";"; //$NON-NLS-1$
3432 case TokenNameQUESTION :
3433 return "?"; //$NON-NLS-1$
3434 case TokenNameCOLON :
3435 return ":"; //$NON-NLS-1$
3436 case TokenNameCOMMA :
3437 return ","; //$NON-NLS-1$
3439 return "."; //$NON-NLS-1$
3440 case TokenNameEQUAL :
3441 return "="; //$NON-NLS-1$
3444 case TokenNameDOLLAR_LBRACE :
3447 return "EOF"; //$NON-NLS-1$
3449 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3454 boolean tokenizeComments,
3455 boolean tokenizeWhiteSpace,
3456 boolean checkNonExternalizedStringLiterals) {
3460 checkNonExternalizedStringLiterals,
3465 boolean tokenizeComments,
3466 boolean tokenizeWhiteSpace,
3467 boolean checkNonExternalizedStringLiterals,
3468 boolean assertMode) {
3469 this.eofPosition = Integer.MAX_VALUE;
3470 this.tokenizeComments = tokenizeComments;
3471 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3472 this.checkNonExternalizedStringLiterals =
3473 checkNonExternalizedStringLiterals;
3474 this.assertMode = assertMode;
3477 private void checkNonExternalizeString() throws InvalidInputException {
3478 if (currentLine == null)
3480 parseTags(currentLine);
3483 private void parseTags(NLSLine line) throws InvalidInputException {
3484 String s = new String(getCurrentTokenSource());
3485 int pos = s.indexOf(TAG_PREFIX);
3486 int lineLength = line.size();
3488 int start = pos + TAG_PREFIX_LENGTH;
3489 int end = s.indexOf(TAG_POSTFIX, start);
3490 String index = s.substring(start, end);
3493 i = Integer.parseInt(index) - 1;
3494 // Tags are one based not zero based.
3495 } catch (NumberFormatException e) {
3496 i = -1; // we don't want to consider this as a valid NLS tag
3498 if (line.exists(i)) {
3501 pos = s.indexOf(TAG_PREFIX, start);
3504 this.nonNLSStrings = new StringLiteral[lineLength];
3505 int nonNLSCounter = 0;
3506 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3507 StringLiteral literal = (StringLiteral) iterator.next();
3508 if (literal != null) {
3509 this.nonNLSStrings[nonNLSCounter++] = literal;
3512 if (nonNLSCounter == 0) {
3513 this.nonNLSStrings = null;
3517 this.wasNonExternalizedStringLiteral = true;
3518 if (nonNLSCounter != lineLength) {
3522 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),