1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
23 - getNextToken() which return the current type of the token
24 (this value is not memorized by the scanner)
25 - getCurrentTokenSource() which provides with the token "REAL" source
26 (aka all unicode have been transformed into a correct char)
27 - sourceStart gives the position into the stream
28 - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray =
120 new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121 static final int TableSize = 30, InternalTableSize = 6;
123 public static final int OptimizedLength = 6;
125 final char[][][][] charArray_length =
126 new char[OptimizedLength][TableSize][InternalTableSize][];
127 // support for detecting non-externalized string literals
128 int currentLineNr = -1;
129 int previousLineNr = -1;
130 NLSLine currentLine = null;
131 List lines = new ArrayList();
132 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136 public StringLiteral[] nonNLSStrings = null;
137 public boolean checkNonExternalizedStringLiterals = true;
138 public boolean wasNonExternalizedStringLiteral = false;
141 for (int i = 0; i < 6; i++) {
142 for (int j = 0; j < TableSize; j++) {
143 for (int k = 0; k < InternalTableSize; k++) {
144 charArray_length[i][j][k] = initCharArray;
149 static int newEntry2 = 0,
155 public static final int RoundBracket = 0;
156 public static final int SquareBracket = 1;
157 public static final int CurlyBracket = 2;
158 public static final int BracketKinds = 3;
160 public static final boolean DEBUG = false;
165 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
166 this(tokenizeComments, tokenizeWhiteSpace, false);
170 * Determines if the specified character is
171 * permissible as the first character in a PHP identifier
173 public static boolean isPHPIdentifierStart(char ch) {
174 return Character.isLetter(ch)
176 || (0x7F <= ch && ch <= 0xFF);
180 * Determines if the specified character may be part of a PHP identifier as
181 * other than the first character
183 public static boolean isPHPIdentifierPart(char ch) {
184 return Character.isLetterOrDigit(ch)
186 || (0x7F <= ch && ch <= 0xFF);
189 public final boolean atEnd() {
190 // This code is not relevant if source is
191 // Only a part of the real stream input
193 return source.length == currentPosition;
195 public char[] getCurrentIdentifierSource() {
196 //return the token REAL source (aka unicodes are precomputed)
199 // if (withoutUnicodePtr != 0)
200 // //0 is used as a fast test flag so the real first char is in position 1
202 // withoutUnicodeBuffer,
204 // result = new char[withoutUnicodePtr],
206 // withoutUnicodePtr);
208 int length = currentPosition - startPosition;
209 switch (length) { // see OptimizedLength
211 return optimizedCurrentTokenSource1();
213 return optimizedCurrentTokenSource2();
215 return optimizedCurrentTokenSource3();
217 return optimizedCurrentTokenSource4();
219 return optimizedCurrentTokenSource5();
221 return optimizedCurrentTokenSource6();
227 result = new char[length],
233 public int getCurrentTokenEndPosition() {
234 return this.currentPosition - 1;
236 public final char[] getCurrentTokenSource() {
237 // Return the token REAL source (aka unicodes are precomputed)
240 // if (withoutUnicodePtr != 0)
241 // // 0 is used as a fast test flag so the real first char is in position 1
243 // withoutUnicodeBuffer,
245 // result = new char[withoutUnicodePtr],
247 // withoutUnicodePtr);
253 result = new char[length = currentPosition - startPosition],
260 public final char[] getCurrentTokenSource(int startPos) {
261 // Return the token REAL source (aka unicodes are precomputed)
264 // if (withoutUnicodePtr != 0)
265 // // 0 is used as a fast test flag so the real first char is in position 1
267 // withoutUnicodeBuffer,
269 // result = new char[withoutUnicodePtr],
271 // withoutUnicodePtr);
277 result = new char[length = currentPosition - startPos],
284 public final char[] getCurrentTokenSourceString() {
285 //return the token REAL source (aka unicodes are precomputed).
286 //REMOVE the two " that are at the beginning and the end.
289 if (withoutUnicodePtr != 0)
290 //0 is used as a fast test flag so the real first char is in position 1
291 System.arraycopy(withoutUnicodeBuffer, 2,
292 //2 is 1 (real start) + 1 (to jump over the ")
293 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
299 result = new char[length = currentPosition - startPosition - 2],
305 public int getCurrentTokenStartPosition() {
306 return this.startPosition;
309 * Search the source position corresponding to the end of a given line number
311 * Line numbers are 1-based, and relative to the scanner initialPosition.
312 * Character positions are 0-based.
314 * In case the given line number is inconsistent, answers -1.
316 public final int getLineEnd(int lineNumber) {
318 if (lineEnds == null)
320 if (lineNumber >= lineEnds.length)
325 if (lineNumber == lineEnds.length - 1)
327 return lineEnds[lineNumber - 1];
328 // next line start one character behind the lineEnd of the previous line
331 * Search the source position corresponding to the beginning of a given line number
333 * Line numbers are 1-based, and relative to the scanner initialPosition.
334 * Character positions are 0-based.
336 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
338 * In case the given line number is inconsistent, answers -1.
340 public final int getLineStart(int lineNumber) {
342 if (lineEnds == null)
344 if (lineNumber >= lineEnds.length)
350 return initialPosition;
351 return lineEnds[lineNumber - 2] + 1;
352 // next line start one character behind the lineEnd of the previous line
354 public final boolean getNextChar(char testedChar) {
356 //handle the case of unicode.
357 //when a unicode appears then we must use a buffer that holds char internal values
358 //At the end of this method currentCharacter holds the new visited char
359 //and currentPosition points right next after it
360 //Both previous lines are true if the currentCharacter is == to the testedChar
361 //On false, no side effect has occured.
363 //ALL getNextChar.... ARE OPTIMIZED COPIES
365 int temp = currentPosition;
367 currentCharacter = source[currentPosition++];
368 // if (((currentCharacter = source[currentPosition++]) == '\\')
369 // && (source[currentPosition] == 'u')) {
370 // //-------------unicode traitement ------------
371 // int c1, c2, c3, c4;
372 // int unicodeSize = 6;
373 // currentPosition++;
374 // while (source[currentPosition] == 'u') {
375 // currentPosition++;
379 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
381 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
383 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
385 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
387 // currentPosition = temp;
391 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
392 // if (currentCharacter != testedChar) {
393 // currentPosition = temp;
396 // unicodeAsBackSlash = currentCharacter == '\\';
398 // //need the unicode buffer
399 // if (withoutUnicodePtr == 0) {
400 // //buffer all the entries that have been left aside....
401 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
405 // withoutUnicodeBuffer,
407 // withoutUnicodePtr);
409 // //fill the buffer with the char
410 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
413 // } //-------------end unicode traitement--------------
415 if (currentCharacter != testedChar) {
416 currentPosition = temp;
419 unicodeAsBackSlash = false;
420 // if (withoutUnicodePtr != 0)
421 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
424 } catch (IndexOutOfBoundsException e) {
425 unicodeAsBackSlash = false;
426 currentPosition = temp;
430 public final int getNextChar(char testedChar1, char testedChar2) {
431 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
432 //test can be done with (x==0) for the first and (x>0) for the second
433 //handle the case of unicode.
434 //when a unicode appears then we must use a buffer that holds char internal values
435 //At the end of this method currentCharacter holds the new visited char
436 //and currentPosition points right next after it
437 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
438 //On false, no side effect has occured.
440 //ALL getNextChar.... ARE OPTIMIZED COPIES
442 int temp = currentPosition;
445 currentCharacter = source[currentPosition++];
446 // if (((currentCharacter = source[currentPosition++]) == '\\')
447 // && (source[currentPosition] == 'u')) {
448 // //-------------unicode traitement ------------
449 // int c1, c2, c3, c4;
450 // int unicodeSize = 6;
451 // currentPosition++;
452 // while (source[currentPosition] == 'u') {
453 // currentPosition++;
457 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
459 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
461 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
463 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
465 // currentPosition = temp;
469 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
470 // if (currentCharacter == testedChar1)
472 // else if (currentCharacter == testedChar2)
475 // currentPosition = temp;
479 // //need the unicode buffer
480 // if (withoutUnicodePtr == 0) {
481 // //buffer all the entries that have been left aside....
482 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
486 // withoutUnicodeBuffer,
488 // withoutUnicodePtr);
490 // //fill the buffer with the char
491 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
493 // } //-------------end unicode traitement--------------
495 if (currentCharacter == testedChar1)
497 else if (currentCharacter == testedChar2)
500 currentPosition = temp;
504 // if (withoutUnicodePtr != 0)
505 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
508 } catch (IndexOutOfBoundsException e) {
509 currentPosition = temp;
513 public final boolean getNextCharAsDigit() {
515 //handle the case of unicode.
516 //when a unicode appears then we must use a buffer that holds char internal values
517 //At the end of this method currentCharacter holds the new visited char
518 //and currentPosition points right next after it
519 //Both previous lines are true if the currentCharacter is a digit
520 //On false, no side effect has occured.
522 //ALL getNextChar.... ARE OPTIMIZED COPIES
524 int temp = currentPosition;
526 currentCharacter = source[currentPosition++];
527 // if (((currentCharacter = source[currentPosition++]) == '\\')
528 // && (source[currentPosition] == 'u')) {
529 // //-------------unicode traitement ------------
530 // int c1, c2, c3, c4;
531 // int unicodeSize = 6;
532 // currentPosition++;
533 // while (source[currentPosition] == 'u') {
534 // currentPosition++;
538 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
540 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
542 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
544 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
546 // currentPosition = temp;
550 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
551 // if (!Character.isDigit(currentCharacter)) {
552 // currentPosition = temp;
556 // //need the unicode buffer
557 // if (withoutUnicodePtr == 0) {
558 // //buffer all the entries that have been left aside....
559 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
563 // withoutUnicodeBuffer,
565 // withoutUnicodePtr);
567 // //fill the buffer with the char
568 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
570 // } //-------------end unicode traitement--------------
572 if (!Character.isDigit(currentCharacter)) {
573 currentPosition = temp;
576 // if (withoutUnicodePtr != 0)
577 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
580 } catch (IndexOutOfBoundsException e) {
581 currentPosition = temp;
585 public final boolean getNextCharAsDigit(int radix) {
587 //handle the case of unicode.
588 //when a unicode appears then we must use a buffer that holds char internal values
589 //At the end of this method currentCharacter holds the new visited char
590 //and currentPosition points right next after it
591 //Both previous lines are true if the currentCharacter is a digit base on radix
592 //On false, no side effect has occured.
594 //ALL getNextChar.... ARE OPTIMIZED COPIES
596 int temp = currentPosition;
598 currentCharacter = source[currentPosition++];
599 // if (((currentCharacter = source[currentPosition++]) == '\\')
600 // && (source[currentPosition] == 'u')) {
601 // //-------------unicode traitement ------------
602 // int c1, c2, c3, c4;
603 // int unicodeSize = 6;
604 // currentPosition++;
605 // while (source[currentPosition] == 'u') {
606 // currentPosition++;
610 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
612 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
614 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
616 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
618 // currentPosition = temp;
622 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
623 // if (Character.digit(currentCharacter, radix) == -1) {
624 // currentPosition = temp;
628 // //need the unicode buffer
629 // if (withoutUnicodePtr == 0) {
630 // //buffer all the entries that have been left aside....
631 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
635 // withoutUnicodeBuffer,
637 // withoutUnicodePtr);
639 // //fill the buffer with the char
640 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
642 // } //-------------end unicode traitement--------------
644 if (Character.digit(currentCharacter, radix) == -1) {
645 currentPosition = temp;
648 // if (withoutUnicodePtr != 0)
649 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
652 } catch (IndexOutOfBoundsException e) {
653 currentPosition = temp;
657 public boolean getNextCharAsJavaIdentifierPart() {
659 //handle the case of unicode.
660 //when a unicode appears then we must use a buffer that holds char internal values
661 //At the end of this method currentCharacter holds the new visited char
662 //and currentPosition points right next after it
663 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
664 //On false, no side effect has occured.
666 //ALL getNextChar.... ARE OPTIMIZED COPIES
668 int temp = currentPosition;
670 currentCharacter = source[currentPosition++];
671 // if (((currentCharacter = source[currentPosition++]) == '\\')
672 // && (source[currentPosition] == 'u')) {
673 // //-------------unicode traitement ------------
674 // int c1, c2, c3, c4;
675 // int unicodeSize = 6;
676 // currentPosition++;
677 // while (source[currentPosition] == 'u') {
678 // currentPosition++;
682 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
684 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
686 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
688 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
690 // currentPosition = temp;
694 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
695 // if (!isPHPIdentifierPart(currentCharacter)) {
696 // currentPosition = temp;
700 // //need the unicode buffer
701 // if (withoutUnicodePtr == 0) {
702 // //buffer all the entries that have been left aside....
703 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
707 // withoutUnicodeBuffer,
709 // withoutUnicodePtr);
711 // //fill the buffer with the char
712 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
714 // } //-------------end unicode traitement--------------
716 if (!isPHPIdentifierPart(currentCharacter)) {
717 currentPosition = temp;
721 // if (withoutUnicodePtr != 0)
722 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
725 } catch (IndexOutOfBoundsException e) {
726 currentPosition = temp;
731 public int getNextToken() throws InvalidInputException {
732 int htmlPosition = currentPosition;
735 currentCharacter = source[currentPosition++];
736 if (currentCharacter == '<') {
737 if (getNextChar('?')) {
738 currentCharacter = source[currentPosition++];
739 if ((currentCharacter == ' ')
740 || Character.isWhitespace(currentCharacter)) {
742 startPosition = currentPosition;
744 if (tokenizeWhiteSpace) {
745 // && (whiteStart != currentPosition - 1)) {
746 // reposition scanner in case we are interested by spaces as tokens
747 startPosition = htmlPosition;
748 return TokenNameHTML;
752 (currentCharacter == 'P') || (currentCharacter == 'p');
754 int test = getNextChar('H', 'h');
756 test = getNextChar('P', 'p');
759 startPosition = currentPosition;
762 if (tokenizeWhiteSpace) {
763 // && (whiteStart != currentPosition - 1)) {
764 // reposition scanner in case we are interested by spaces as tokens
765 startPosition = htmlPosition;
766 return TokenNameHTML;
775 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
776 if (recordLineSeparator) {
783 } //-----------------end switch while try--------------------
784 catch (IndexOutOfBoundsException e) {
785 if (tokenizeWhiteSpace) {
786 // && (whiteStart != currentPosition - 1)) {
787 // reposition scanner in case we are interested by spaces as tokens
788 startPosition = htmlPosition;
796 jumpOverMethodBody();
798 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
801 while (true) { //loop for jumping over comments
802 withoutUnicodePtr = 0;
803 //start with a new token (even comment written with unicode )
805 // ---------Consume white space and handles startPosition---------
806 int whiteStart = currentPosition;
807 boolean isWhiteSpace;
809 startPosition = currentPosition;
810 currentCharacter = source[currentPosition++];
811 // if (((currentCharacter = source[currentPosition++]) == '\\')
812 // && (source[currentPosition] == 'u')) {
813 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
815 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
816 checkNonExternalizeString();
817 if (recordLineSeparator) {
824 (currentCharacter == ' ')
825 || Character.isWhitespace(currentCharacter);
827 } while (isWhiteSpace);
828 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
829 // reposition scanner in case we are interested by spaces as tokens
831 startPosition = whiteStart;
832 return TokenNameWHITESPACE;
834 //little trick to get out in the middle of a source compuation
835 if (currentPosition > eofPosition)
838 // ---------Identify the next token-------------
840 switch (currentCharacter) {
842 return TokenNameLPAREN;
844 return TokenNameRPAREN;
846 return TokenNameLBRACE;
848 return TokenNameRBRACE;
850 return TokenNameLBRACKET;
852 return TokenNameRBRACKET;
854 return TokenNameSEMICOLON;
856 return TokenNameCOMMA;
859 if (getNextCharAsDigit())
860 return scanNumber(true);
865 if ((test = getNextChar('+', '=')) == 0)
866 return TokenNamePLUS_PLUS;
868 return TokenNamePLUS_EQUAL;
869 return TokenNamePLUS;
874 if ((test = getNextChar('-', '=')) == 0)
875 return TokenNameMINUS_MINUS;
877 return TokenNameMINUS_EQUAL;
878 if (getNextChar('>'))
879 return TokenNameMINUS_GREATER;
881 return TokenNameMINUS;
884 if (getNextChar('='))
885 return TokenNameTWIDDLE_EQUAL;
886 return TokenNameTWIDDLE;
888 if (getNextChar('='))
889 return TokenNameNOT_EQUAL;
892 if (getNextChar('='))
893 return TokenNameMULTIPLY_EQUAL;
894 return TokenNameMULTIPLY;
896 if (getNextChar('='))
897 return TokenNameREMAINDER_EQUAL;
898 return TokenNameREMAINDER;
902 if ((test = getNextChar('=', '<')) == 0)
903 return TokenNameLESS_EQUAL;
905 if (getNextChar('='))
906 return TokenNameLEFT_SHIFT_EQUAL;
907 if (getNextChar('<')) {
908 int heredocStart = currentPosition;
909 int heredocLength = 0;
910 currentCharacter = source[currentPosition++];
911 if (isPHPIdentifierStart(currentCharacter)) {
912 currentCharacter = source[currentPosition++];
914 return TokenNameERROR;
916 while (isPHPIdentifierPart(currentCharacter)) {
917 currentCharacter = source[currentPosition++];
920 heredocLength = currentPosition - heredocStart - 1;
922 // heredoc end-tag determination
923 boolean endTag = true;
926 ch = source[currentPosition++];
927 if (ch == '\r' || ch == '\n') {
928 if (recordLineSeparator) {
933 for (int i = 0; i < heredocLength; i++) {
934 if (source[currentPosition + i]
935 != source[heredocStart + i]) {
941 currentPosition += heredocLength - 1;
942 currentCharacter = source[currentPosition++];
943 break; // do...while loop
951 return TokenNameHEREDOC;
953 return TokenNameLEFT_SHIFT;
955 return TokenNameLESS;
960 if ((test = getNextChar('=', '>')) == 0)
961 return TokenNameGREATER_EQUAL;
963 if ((test = getNextChar('=', '>')) == 0)
964 return TokenNameRIGHT_SHIFT_EQUAL;
965 return TokenNameRIGHT_SHIFT;
967 return TokenNameGREATER;
970 if (getNextChar('='))
971 return TokenNameEQUAL_EQUAL;
972 if (getNextChar('>'))
973 return TokenNameEQUAL_GREATER;
974 return TokenNameEQUAL;
978 if ((test = getNextChar('&', '=')) == 0)
979 return TokenNameAND_AND;
981 return TokenNameAND_EQUAL;
987 if ((test = getNextChar('|', '=')) == 0)
988 return TokenNameOR_OR;
990 return TokenNameOR_EQUAL;
994 if (getNextChar('='))
995 return TokenNameXOR_EQUAL;
998 if (getNextChar('>')) {
1000 return TokenNameStopPHP;
1002 return TokenNameQUESTION;
1004 if (getNextChar(':'))
1005 return TokenNameCOLON_COLON;
1006 return TokenNameCOLON;
1012 // if ((test = getNextChar('\n', '\r')) == 0) {
1013 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1016 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1017 // for (int lookAhead = 0;
1020 // if (currentPosition + lookAhead
1021 // == source.length)
1023 // if (source[currentPosition + lookAhead]
1026 // if (source[currentPosition + lookAhead]
1028 // currentPosition += lookAhead + 1;
1032 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1035 // if (getNextChar('\'')) {
1036 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1037 // for (int lookAhead = 0;
1040 // if (currentPosition + lookAhead
1041 // == source.length)
1043 // if (source[currentPosition + lookAhead]
1046 // if (source[currentPosition + lookAhead]
1048 // currentPosition += lookAhead + 1;
1052 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1054 // if (getNextChar('\\'))
1055 // scanEscapeCharacter();
1056 // else { // consume next character
1057 // unicodeAsBackSlash = false;
1058 // if (((currentCharacter = source[currentPosition++])
1060 // && (source[currentPosition] == 'u')) {
1061 // getNextUnicodeChar();
1063 // if (withoutUnicodePtr != 0) {
1064 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1065 // currentCharacter;
1069 // // if (getNextChar('\''))
1070 // // return TokenNameCharacterLiteral;
1071 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1072 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1073 // if (currentPosition + lookAhead == source.length)
1075 // if (source[currentPosition + lookAhead] == '\n')
1077 // if (source[currentPosition + lookAhead] == '\'') {
1078 // currentPosition += lookAhead + 1;
1082 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1085 // consume next character
1086 unicodeAsBackSlash = false;
1087 currentCharacter = source[currentPosition++];
1088 // if (((currentCharacter = source[currentPosition++]) == '\\')
1089 // && (source[currentPosition] == 'u')) {
1090 // getNextUnicodeChar();
1092 // if (withoutUnicodePtr != 0) {
1093 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1094 // currentCharacter;
1098 while (currentCharacter != '\'') {
1100 /**** in PHP \r and \n are valid in string literals ****/
1101 // if ((currentCharacter == '\n')
1102 // || (currentCharacter == '\r')) {
1103 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1104 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1105 // if (currentPosition + lookAhead == source.length)
1107 // if (source[currentPosition + lookAhead] == '\n')
1109 // if (source[currentPosition + lookAhead] == '\"') {
1110 // currentPosition += lookAhead + 1;
1114 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1116 if (currentCharacter == '\\') {
1117 int escapeSize = currentPosition;
1118 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1119 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1120 scanSingleQuotedEscapeCharacter();
1121 escapeSize = currentPosition - escapeSize;
1122 if (withoutUnicodePtr == 0) {
1123 //buffer all the entries that have been left aside....
1125 currentPosition - escapeSize - 1 - startPosition;
1129 withoutUnicodeBuffer,
1132 withoutUnicodeBuffer[++withoutUnicodePtr] =
1134 } else { //overwrite the / in the buffer
1135 withoutUnicodeBuffer[withoutUnicodePtr] =
1137 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1138 withoutUnicodePtr--;
1142 // consume next character
1143 unicodeAsBackSlash = false;
1144 currentCharacter = source[currentPosition++];
1145 // if (((currentCharacter = source[currentPosition++]) == '\\')
1146 // && (source[currentPosition] == 'u')) {
1147 // getNextUnicodeChar();
1149 if (withoutUnicodePtr != 0) {
1150 withoutUnicodeBuffer[++withoutUnicodePtr] =
1156 } catch (IndexOutOfBoundsException e) {
1157 throw new InvalidInputException(UNTERMINATED_STRING);
1158 } catch (InvalidInputException e) {
1159 if (e.getMessage().equals(INVALID_ESCAPE)) {
1160 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1161 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1162 if (currentPosition + lookAhead == source.length)
1164 if (source[currentPosition + lookAhead] == '\n')
1166 if (source[currentPosition + lookAhead] == '\'') {
1167 currentPosition += lookAhead + 1;
1175 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1176 if (currentLine == null) {
1177 currentLine = new NLSLine();
1178 lines.add(currentLine);
1182 getCurrentTokenSourceString(),
1184 currentPosition - 1));
1186 return TokenNameStringConstant;
1189 // consume next character
1190 unicodeAsBackSlash = false;
1191 currentCharacter = source[currentPosition++];
1192 // if (((currentCharacter = source[currentPosition++]) == '\\')
1193 // && (source[currentPosition] == 'u')) {
1194 // getNextUnicodeChar();
1196 // if (withoutUnicodePtr != 0) {
1197 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1198 // currentCharacter;
1202 while (currentCharacter != '"') {
1204 /**** in PHP \r and \n are valid in string literals ****/
1205 // if ((currentCharacter == '\n')
1206 // || (currentCharacter == '\r')) {
1207 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1208 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1209 // if (currentPosition + lookAhead == source.length)
1211 // if (source[currentPosition + lookAhead] == '\n')
1213 // if (source[currentPosition + lookAhead] == '\"') {
1214 // currentPosition += lookAhead + 1;
1218 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1220 if (currentCharacter == '\\') {
1221 int escapeSize = currentPosition;
1222 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1223 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1224 scanDoubleQuotedEscapeCharacter();
1225 escapeSize = currentPosition - escapeSize;
1226 if (withoutUnicodePtr == 0) {
1227 //buffer all the entries that have been left aside....
1229 currentPosition - escapeSize - 1 - startPosition;
1233 withoutUnicodeBuffer,
1236 withoutUnicodeBuffer[++withoutUnicodePtr] =
1238 } else { //overwrite the / in the buffer
1239 withoutUnicodeBuffer[withoutUnicodePtr] =
1241 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1242 withoutUnicodePtr--;
1246 // consume next character
1247 unicodeAsBackSlash = false;
1248 currentCharacter = source[currentPosition++];
1249 // if (((currentCharacter = source[currentPosition++]) == '\\')
1250 // && (source[currentPosition] == 'u')) {
1251 // getNextUnicodeChar();
1253 if (withoutUnicodePtr != 0) {
1254 withoutUnicodeBuffer[++withoutUnicodePtr] =
1260 } catch (IndexOutOfBoundsException e) {
1261 throw new InvalidInputException(UNTERMINATED_STRING);
1262 } catch (InvalidInputException e) {
1263 if (e.getMessage().equals(INVALID_ESCAPE)) {
1264 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1265 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1266 if (currentPosition + lookAhead == source.length)
1268 if (source[currentPosition + lookAhead] == '\n')
1270 if (source[currentPosition + lookAhead] == '\"') {
1271 currentPosition += lookAhead + 1;
1279 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1280 if (currentLine == null) {
1281 currentLine = new NLSLine();
1282 lines.add(currentLine);
1286 getCurrentTokenSourceString(),
1288 currentPosition - 1));
1290 return TokenNameStringLiteral;
1293 // consume next character
1294 unicodeAsBackSlash = false;
1295 currentCharacter = source[currentPosition++];
1296 // if (((currentCharacter = source[currentPosition++]) == '\\')
1297 // && (source[currentPosition] == 'u')) {
1298 // getNextUnicodeChar();
1300 // if (withoutUnicodePtr != 0) {
1301 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1302 // currentCharacter;
1306 while (currentCharacter != '`') {
1308 /**** in PHP \r and \n are valid in string literals ****/
1309 // if ((currentCharacter == '\n')
1310 // || (currentCharacter == '\r')) {
1311 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1312 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1313 // if (currentPosition + lookAhead == source.length)
1315 // if (source[currentPosition + lookAhead] == '\n')
1317 // if (source[currentPosition + lookAhead] == '\"') {
1318 // currentPosition += lookAhead + 1;
1322 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1324 if (currentCharacter == '\\') {
1325 int escapeSize = currentPosition;
1326 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1327 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1328 scanDoubleQuotedEscapeCharacter();
1329 escapeSize = currentPosition - escapeSize;
1330 if (withoutUnicodePtr == 0) {
1331 //buffer all the entries that have been left aside....
1333 currentPosition - escapeSize - 1 - startPosition;
1337 withoutUnicodeBuffer,
1340 withoutUnicodeBuffer[++withoutUnicodePtr] =
1342 } else { //overwrite the / in the buffer
1343 withoutUnicodeBuffer[withoutUnicodePtr] =
1345 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1346 withoutUnicodePtr--;
1350 // consume next character
1351 unicodeAsBackSlash = false;
1352 currentCharacter = source[currentPosition++];
1353 // if (((currentCharacter = source[currentPosition++]) == '\\')
1354 // && (source[currentPosition] == 'u')) {
1355 // getNextUnicodeChar();
1357 if (withoutUnicodePtr != 0) {
1358 withoutUnicodeBuffer[++withoutUnicodePtr] =
1364 } catch (IndexOutOfBoundsException e) {
1365 throw new InvalidInputException(UNTERMINATED_STRING);
1366 } catch (InvalidInputException e) {
1367 if (e.getMessage().equals(INVALID_ESCAPE)) {
1368 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1369 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1370 if (currentPosition + lookAhead == source.length)
1372 if (source[currentPosition + lookAhead] == '\n')
1374 if (source[currentPosition + lookAhead] == '`') {
1375 currentPosition += lookAhead + 1;
1383 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1384 if (currentLine == null) {
1385 currentLine = new NLSLine();
1386 lines.add(currentLine);
1390 getCurrentTokenSourceString(),
1392 currentPosition - 1));
1394 return TokenNameStringInterpolated;
1399 if ((currentCharacter == '#')
1400 || (test = getNextChar('/', '*')) == 0) {
1402 int endPositionForLineComment = 0;
1403 try { //get the next char
1404 currentCharacter = source[currentPosition++];
1405 // if (((currentCharacter = source[currentPosition++])
1407 // && (source[currentPosition] == 'u')) {
1408 // //-------------unicode traitement ------------
1409 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1410 // currentPosition++;
1411 // while (source[currentPosition] == 'u') {
1412 // currentPosition++;
1415 // Character.getNumericValue(source[currentPosition++]))
1419 // Character.getNumericValue(source[currentPosition++]))
1423 // Character.getNumericValue(source[currentPosition++]))
1427 // Character.getNumericValue(source[currentPosition++]))
1430 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1432 // currentCharacter =
1433 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1437 //handle the \\u case manually into comment
1438 // if (currentCharacter == '\\') {
1439 // if (source[currentPosition] == '\\')
1440 // currentPosition++;
1441 // } //jump over the \\
1442 boolean isUnicode = false;
1443 while (currentCharacter != '\r'
1444 && currentCharacter != '\n') {
1445 if (currentCharacter == '?') {
1446 if (getNextChar('>')) {
1447 startPosition = currentPosition - 2;
1449 return TokenNameStopPHP;
1455 currentCharacter = source[currentPosition++];
1456 // if (((currentCharacter = source[currentPosition++])
1458 // && (source[currentPosition] == 'u')) {
1459 // isUnicode = true;
1460 // //-------------unicode traitement ------------
1461 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1462 // currentPosition++;
1463 // while (source[currentPosition] == 'u') {
1464 // currentPosition++;
1467 // Character.getNumericValue(source[currentPosition++]))
1471 // Character.getNumericValue(
1472 // source[currentPosition++]))
1476 // Character.getNumericValue(
1477 // source[currentPosition++]))
1481 // Character.getNumericValue(
1482 // source[currentPosition++]))
1485 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1487 // currentCharacter =
1488 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1491 //handle the \\u case manually into comment
1492 // if (currentCharacter == '\\') {
1493 // if (source[currentPosition] == '\\')
1494 // currentPosition++;
1495 // } //jump over the \\
1498 endPositionForLineComment = currentPosition - 6;
1500 endPositionForLineComment = currentPosition - 1;
1502 recordComment(false);
1503 if ((currentCharacter == '\r')
1504 || (currentCharacter == '\n')) {
1505 checkNonExternalizeString();
1506 if (recordLineSeparator) {
1508 pushUnicodeLineSeparator();
1510 pushLineSeparator();
1516 if (tokenizeComments) {
1518 currentPosition = endPositionForLineComment;
1519 // reset one character behind
1521 return TokenNameCOMMENT_LINE;
1523 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1524 if (tokenizeComments) {
1526 // reset one character behind
1527 return TokenNameCOMMENT_LINE;
1533 //traditional and annotation comment
1534 boolean isJavadoc = false, star = false;
1535 // consume next character
1536 unicodeAsBackSlash = false;
1537 currentCharacter = source[currentPosition++];
1538 // if (((currentCharacter = source[currentPosition++]) == '\\')
1539 // && (source[currentPosition] == 'u')) {
1540 // getNextUnicodeChar();
1542 // if (withoutUnicodePtr != 0) {
1543 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1544 // currentCharacter;
1548 if (currentCharacter == '*') {
1552 if ((currentCharacter == '\r')
1553 || (currentCharacter == '\n')) {
1554 checkNonExternalizeString();
1555 if (recordLineSeparator) {
1556 pushLineSeparator();
1561 try { //get the next char
1562 currentCharacter = source[currentPosition++];
1563 // if (((currentCharacter = source[currentPosition++])
1565 // && (source[currentPosition] == 'u')) {
1566 // //-------------unicode traitement ------------
1567 // getNextUnicodeChar();
1569 //handle the \\u case manually into comment
1570 // if (currentCharacter == '\\') {
1571 // if (source[currentPosition] == '\\')
1572 // currentPosition++;
1573 // //jump over the \\
1575 // empty comment is not a javadoc /**/
1576 if (currentCharacter == '/') {
1579 //loop until end of comment */
1580 while ((currentCharacter != '/') || (!star)) {
1581 if ((currentCharacter == '\r')
1582 || (currentCharacter == '\n')) {
1583 checkNonExternalizeString();
1584 if (recordLineSeparator) {
1585 pushLineSeparator();
1590 star = currentCharacter == '*';
1592 currentCharacter = source[currentPosition++];
1593 // if (((currentCharacter = source[currentPosition++])
1595 // && (source[currentPosition] == 'u')) {
1596 // //-------------unicode traitement ------------
1597 // getNextUnicodeChar();
1599 //handle the \\u case manually into comment
1600 // if (currentCharacter == '\\') {
1601 // if (source[currentPosition] == '\\')
1602 // currentPosition++;
1603 // } //jump over the \\
1605 recordComment(isJavadoc);
1606 if (tokenizeComments) {
1608 return TokenNameCOMMENT_PHPDOC;
1609 return TokenNameCOMMENT_BLOCK;
1611 } catch (IndexOutOfBoundsException e) {
1612 throw new InvalidInputException(UNTERMINATED_COMMENT);
1616 if (getNextChar('='))
1617 return TokenNameDIVIDE_EQUAL;
1618 return TokenNameDIVIDE;
1622 return TokenNameEOF;
1623 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1624 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1627 if (currentCharacter == '$') {
1628 while ((currentCharacter = source[currentPosition++]) == '$') {
1630 if (currentCharacter == '{')
1631 return TokenNameDOLLAR_LBRACE;
1632 if (isPHPIdentifierStart(currentCharacter))
1633 return scanIdentifierOrKeyword(true);
1634 return TokenNameERROR;
1636 if (isPHPIdentifierStart(currentCharacter))
1637 return scanIdentifierOrKeyword(false);
1638 if (Character.isDigit(currentCharacter))
1639 return scanNumber(false);
1640 return TokenNameERROR;
1643 } //-----------------end switch while try--------------------
1644 catch (IndexOutOfBoundsException e) {
1647 return TokenNameEOF;
1650 // public final void getNextUnicodeChar()
1651 // throws IndexOutOfBoundsException, InvalidInputException {
1653 // //handle the case of unicode.
1654 // //when a unicode appears then we must use a buffer that holds char internal values
1655 // //At the end of this method currentCharacter holds the new visited char
1656 // //and currentPosition points right next after it
1658 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1660 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1661 // currentPosition++;
1662 // while (source[currentPosition] == 'u') {
1663 // currentPosition++;
1667 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1669 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1671 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1673 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1675 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1677 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1678 // //need the unicode buffer
1679 // if (withoutUnicodePtr == 0) {
1680 // //buffer all the entries that have been left aside....
1681 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1682 // System.arraycopy(
1685 // withoutUnicodeBuffer,
1687 // withoutUnicodePtr);
1689 // //fill the buffer with the char
1690 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1692 // unicodeAsBackSlash = currentCharacter == '\\';
1694 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1696 public final void jumpOverMethodBody() {
1698 this.wasAcr = false;
1701 while (true) { //loop for jumping over comments
1702 // ---------Consume white space and handles startPosition---------
1703 boolean isWhiteSpace;
1705 startPosition = currentPosition;
1706 currentCharacter = source[currentPosition++];
1707 // if (((currentCharacter = source[currentPosition++]) == '\\')
1708 // && (source[currentPosition] == 'u')) {
1709 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1711 if (recordLineSeparator
1712 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1713 pushLineSeparator();
1714 isWhiteSpace = Character.isWhitespace(currentCharacter);
1716 } while (isWhiteSpace);
1718 // -------consume token until } is found---------
1719 switch (currentCharacter) {
1731 test = getNextChar('\\');
1734 scanDoubleQuotedEscapeCharacter();
1735 } catch (InvalidInputException ex) {
1738 // try { // consume next character
1739 unicodeAsBackSlash = false;
1740 currentCharacter = source[currentPosition++];
1741 // if (((currentCharacter = source[currentPosition++]) == '\\')
1742 // && (source[currentPosition] == 'u')) {
1743 // getNextUnicodeChar();
1745 if (withoutUnicodePtr != 0) {
1746 withoutUnicodeBuffer[++withoutUnicodePtr] =
1750 // } catch (InvalidInputException ex) {
1758 // try { // consume next character
1759 unicodeAsBackSlash = false;
1760 currentCharacter = source[currentPosition++];
1761 // if (((currentCharacter = source[currentPosition++]) == '\\')
1762 // && (source[currentPosition] == 'u')) {
1763 // getNextUnicodeChar();
1765 if (withoutUnicodePtr != 0) {
1766 withoutUnicodeBuffer[++withoutUnicodePtr] =
1770 // } catch (InvalidInputException ex) {
1772 while (currentCharacter != '"') {
1773 if (currentCharacter == '\r') {
1774 if (source[currentPosition] == '\n')
1777 // the string cannot go further that the line
1779 if (currentCharacter == '\n') {
1781 // the string cannot go further that the line
1783 if (currentCharacter == '\\') {
1785 scanDoubleQuotedEscapeCharacter();
1786 } catch (InvalidInputException ex) {
1789 // try { // consume next character
1790 unicodeAsBackSlash = false;
1791 currentCharacter = source[currentPosition++];
1792 // if (((currentCharacter = source[currentPosition++]) == '\\')
1793 // && (source[currentPosition] == 'u')) {
1794 // getNextUnicodeChar();
1796 if (withoutUnicodePtr != 0) {
1797 withoutUnicodeBuffer[++withoutUnicodePtr] =
1801 // } catch (InvalidInputException ex) {
1804 } catch (IndexOutOfBoundsException e) {
1811 if ((test = getNextChar('/', '*')) == 0) {
1815 currentCharacter = source[currentPosition++];
1816 // if (((currentCharacter = source[currentPosition++]) == '\\')
1817 // && (source[currentPosition] == 'u')) {
1818 // //-------------unicode traitement ------------
1819 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1820 // currentPosition++;
1821 // while (source[currentPosition] == 'u') {
1822 // currentPosition++;
1825 // Character.getNumericValue(source[currentPosition++]))
1829 // Character.getNumericValue(source[currentPosition++]))
1833 // Character.getNumericValue(source[currentPosition++]))
1837 // Character.getNumericValue(source[currentPosition++]))
1840 // //error don't care of the value
1841 // currentCharacter = 'A';
1842 // } //something different from \n and \r
1844 // currentCharacter =
1845 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1849 while (currentCharacter != '\r'
1850 && currentCharacter != '\n') {
1852 currentCharacter = source[currentPosition++];
1853 // if (((currentCharacter = source[currentPosition++])
1855 // && (source[currentPosition] == 'u')) {
1856 // //-------------unicode traitement ------------
1857 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1858 // currentPosition++;
1859 // while (source[currentPosition] == 'u') {
1860 // currentPosition++;
1863 // Character.getNumericValue(source[currentPosition++]))
1867 // Character.getNumericValue(source[currentPosition++]))
1871 // Character.getNumericValue(source[currentPosition++]))
1875 // Character.getNumericValue(source[currentPosition++]))
1878 // //error don't care of the value
1879 // currentCharacter = 'A';
1880 // } //something different from \n and \r
1882 // currentCharacter =
1883 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1887 if (recordLineSeparator
1888 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1889 pushLineSeparator();
1890 } catch (IndexOutOfBoundsException e) {
1891 } //an eof will them be generated
1895 //traditional and annotation comment
1896 boolean star = false;
1897 // try { // consume next character
1898 unicodeAsBackSlash = false;
1899 currentCharacter = source[currentPosition++];
1900 // if (((currentCharacter = source[currentPosition++]) == '\\')
1901 // && (source[currentPosition] == 'u')) {
1902 // getNextUnicodeChar();
1904 if (withoutUnicodePtr != 0) {
1905 withoutUnicodeBuffer[++withoutUnicodePtr] =
1909 // } catch (InvalidInputException ex) {
1911 if (currentCharacter == '*') {
1914 if (recordLineSeparator
1915 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1916 pushLineSeparator();
1917 try { //get the next char
1918 currentCharacter = source[currentPosition++];
1919 // if (((currentCharacter = source[currentPosition++]) == '\\')
1920 // && (source[currentPosition] == 'u')) {
1921 // //-------------unicode traitement ------------
1922 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1923 // currentPosition++;
1924 // while (source[currentPosition] == 'u') {
1925 // currentPosition++;
1928 // Character.getNumericValue(source[currentPosition++]))
1932 // Character.getNumericValue(source[currentPosition++]))
1936 // Character.getNumericValue(source[currentPosition++]))
1940 // Character.getNumericValue(source[currentPosition++]))
1943 // //error don't care of the value
1944 // currentCharacter = 'A';
1945 // } //something different from * and /
1947 // currentCharacter =
1948 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1951 //loop until end of comment */
1952 while ((currentCharacter != '/') || (!star)) {
1953 if (recordLineSeparator
1954 && ((currentCharacter == '\r')
1955 || (currentCharacter == '\n')))
1956 pushLineSeparator();
1957 star = currentCharacter == '*';
1959 currentCharacter = source[currentPosition++];
1960 // if (((currentCharacter = source[currentPosition++])
1962 // && (source[currentPosition] == 'u')) {
1963 // //-------------unicode traitement ------------
1964 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1965 // currentPosition++;
1966 // while (source[currentPosition] == 'u') {
1967 // currentPosition++;
1970 // Character.getNumericValue(source[currentPosition++]))
1974 // Character.getNumericValue(source[currentPosition++]))
1978 // Character.getNumericValue(source[currentPosition++]))
1982 // Character.getNumericValue(source[currentPosition++]))
1985 // //error don't care of the value
1986 // currentCharacter = 'A';
1987 // } //something different from * and /
1989 // currentCharacter =
1990 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1994 } catch (IndexOutOfBoundsException e) {
2003 if (isPHPIdentifierStart(currentCharacter)
2004 || currentCharacter == '$') {
2006 scanIdentifierOrKeyword((currentCharacter == '$'));
2007 } catch (InvalidInputException ex) {
2011 if (Character.isDigit(currentCharacter)) {
2014 } catch (InvalidInputException ex) {
2020 //-----------------end switch while try--------------------
2021 } catch (IndexOutOfBoundsException e) {
2022 } catch (InvalidInputException e) {
2026 // public final boolean jumpOverUnicodeWhiteSpace()
2027 // throws InvalidInputException {
2029 // //handle the case of unicode. Jump over the next whiteSpace
2030 // //making startPosition pointing on the next available char
2031 // //On false, the currentCharacter is filled up with a potential
2035 // this.wasAcr = false;
2036 // int c1, c2, c3, c4;
2037 // int unicodeSize = 6;
2038 // currentPosition++;
2039 // while (source[currentPosition] == 'u') {
2040 // currentPosition++;
2044 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2046 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2048 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2050 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2052 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2055 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2056 // if (recordLineSeparator
2057 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2058 // pushLineSeparator();
2059 // if (Character.isWhitespace(currentCharacter))
2062 // //buffer the new char which is not a white space
2063 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2064 // //withoutUnicodePtr == 1 is true here
2066 // } catch (IndexOutOfBoundsException e) {
2067 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2070 public final int[] getLineEnds() {
2071 //return a bounded copy of this.lineEnds
2074 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2078 public char[] getSource() {
2081 final char[] optimizedCurrentTokenSource1() {
2082 //return always the same char[] build only once
2084 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2085 char charOne = source[startPosition];
2140 return new char[] { charOne };
2144 final char[] optimizedCurrentTokenSource2() {
2145 //try to return the same char[] build only once
2149 (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2151 char[][] table = charArray_length[0][hash];
2153 while (++i < InternalTableSize) {
2154 char[] charArray = table[i];
2155 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2158 //---------other side---------
2160 int max = newEntry2;
2161 while (++i <= max) {
2162 char[] charArray = table[i];
2163 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2166 //--------add the entry-------
2167 if (++max >= InternalTableSize)
2170 table[max] = (r = new char[] { c0, c1 });
2175 final char[] optimizedCurrentTokenSource3() {
2176 //try to return the same char[] build only once
2180 (((c0 = source[startPosition]) << 12)
2181 + ((c1 = source[startPosition + 1]) << 6)
2182 + (c2 = source[startPosition + 2]))
2184 char[][] table = charArray_length[1][hash];
2186 while (++i < InternalTableSize) {
2187 char[] charArray = table[i];
2188 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2191 //---------other side---------
2193 int max = newEntry3;
2194 while (++i <= max) {
2195 char[] charArray = table[i];
2196 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2199 //--------add the entry-------
2200 if (++max >= InternalTableSize)
2203 table[max] = (r = new char[] { c0, c1, c2 });
2208 final char[] optimizedCurrentTokenSource4() {
2209 //try to return the same char[] build only once
2211 char c0, c1, c2, c3;
2213 ((((long) (c0 = source[startPosition])) << 18)
2214 + ((c1 = source[startPosition + 1]) << 12)
2215 + ((c2 = source[startPosition + 2]) << 6)
2216 + (c3 = source[startPosition + 3]))
2218 char[][] table = charArray_length[2][(int) hash];
2220 while (++i < InternalTableSize) {
2221 char[] charArray = table[i];
2222 if ((c0 == charArray[0])
2223 && (c1 == charArray[1])
2224 && (c2 == charArray[2])
2225 && (c3 == charArray[3]))
2228 //---------other side---------
2230 int max = newEntry4;
2231 while (++i <= max) {
2232 char[] charArray = table[i];
2233 if ((c0 == charArray[0])
2234 && (c1 == charArray[1])
2235 && (c2 == charArray[2])
2236 && (c3 == charArray[3]))
2239 //--------add the entry-------
2240 if (++max >= InternalTableSize)
2243 table[max] = (r = new char[] { c0, c1, c2, c3 });
2249 final char[] optimizedCurrentTokenSource5() {
2250 //try to return the same char[] build only once
2252 char c0, c1, c2, c3, c4;
2254 ((((long) (c0 = source[startPosition])) << 24)
2255 + (((long) (c1 = source[startPosition + 1])) << 18)
2256 + ((c2 = source[startPosition + 2]) << 12)
2257 + ((c3 = source[startPosition + 3]) << 6)
2258 + (c4 = source[startPosition + 4]))
2260 char[][] table = charArray_length[3][(int) hash];
2262 while (++i < InternalTableSize) {
2263 char[] charArray = table[i];
2264 if ((c0 == charArray[0])
2265 && (c1 == charArray[1])
2266 && (c2 == charArray[2])
2267 && (c3 == charArray[3])
2268 && (c4 == charArray[4]))
2271 //---------other side---------
2273 int max = newEntry5;
2274 while (++i <= max) {
2275 char[] charArray = table[i];
2276 if ((c0 == charArray[0])
2277 && (c1 == charArray[1])
2278 && (c2 == charArray[2])
2279 && (c3 == charArray[3])
2280 && (c4 == charArray[4]))
2283 //--------add the entry-------
2284 if (++max >= InternalTableSize)
2287 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2293 final char[] optimizedCurrentTokenSource6() {
2294 //try to return the same char[] build only once
2296 char c0, c1, c2, c3, c4, c5;
2298 ((((long) (c0 = source[startPosition])) << 32)
2299 + (((long) (c1 = source[startPosition + 1])) << 24)
2300 + (((long) (c2 = source[startPosition + 2])) << 18)
2301 + ((c3 = source[startPosition + 3]) << 12)
2302 + ((c4 = source[startPosition + 4]) << 6)
2303 + (c5 = source[startPosition + 5]))
2305 char[][] table = charArray_length[4][(int) hash];
2307 while (++i < InternalTableSize) {
2308 char[] charArray = table[i];
2309 if ((c0 == charArray[0])
2310 && (c1 == charArray[1])
2311 && (c2 == charArray[2])
2312 && (c3 == charArray[3])
2313 && (c4 == charArray[4])
2314 && (c5 == charArray[5]))
2317 //---------other side---------
2319 int max = newEntry6;
2320 while (++i <= max) {
2321 char[] charArray = table[i];
2322 if ((c0 == charArray[0])
2323 && (c1 == charArray[1])
2324 && (c2 == charArray[2])
2325 && (c3 == charArray[3])
2326 && (c4 == charArray[4])
2327 && (c5 == charArray[5]))
2330 //--------add the entry-------
2331 if (++max >= InternalTableSize)
2334 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2339 public final void pushLineSeparator() throws InvalidInputException {
2340 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2341 final int INCREMENT = 250;
2343 if (this.checkNonExternalizedStringLiterals) {
2344 // reinitialize the current line for non externalize strings purpose
2347 //currentCharacter is at position currentPosition-1
2350 if (currentCharacter == '\r') {
2351 int separatorPos = currentPosition - 1;
2352 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2354 //System.out.println("CR-" + separatorPos);
2356 lineEnds[++linePtr] = separatorPos;
2357 } catch (IndexOutOfBoundsException e) {
2358 //linePtr value is correct
2359 int oldLength = lineEnds.length;
2360 int[] old = lineEnds;
2361 lineEnds = new int[oldLength + INCREMENT];
2362 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2363 lineEnds[linePtr] = separatorPos;
2365 // look-ahead for merged cr+lf
2367 if (source[currentPosition] == '\n') {
2368 //System.out.println("look-ahead LF-" + currentPosition);
2369 lineEnds[linePtr] = currentPosition;
2375 } catch (IndexOutOfBoundsException e) {
2380 if (currentCharacter == '\n') {
2381 //must merge eventual cr followed by lf
2382 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2383 //System.out.println("merge LF-" + (currentPosition - 1));
2384 lineEnds[linePtr] = currentPosition - 1;
2386 int separatorPos = currentPosition - 1;
2387 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2389 // System.out.println("LF-" + separatorPos);
2391 lineEnds[++linePtr] = separatorPos;
2392 } catch (IndexOutOfBoundsException e) {
2393 //linePtr value is correct
2394 int oldLength = lineEnds.length;
2395 int[] old = lineEnds;
2396 lineEnds = new int[oldLength + INCREMENT];
2397 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2398 lineEnds[linePtr] = separatorPos;
2405 public final void pushUnicodeLineSeparator() {
2406 // isUnicode means that the \r or \n has been read as a unicode character
2408 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2410 final int INCREMENT = 250;
2411 //currentCharacter is at position currentPosition-1
2413 if (this.checkNonExternalizedStringLiterals) {
2414 // reinitialize the current line for non externalize strings purpose
2419 if (currentCharacter == '\r') {
2420 int separatorPos = currentPosition - 6;
2421 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2423 //System.out.println("CR-" + separatorPos);
2425 lineEnds[++linePtr] = separatorPos;
2426 } catch (IndexOutOfBoundsException e) {
2427 //linePtr value is correct
2428 int oldLength = lineEnds.length;
2429 int[] old = lineEnds;
2430 lineEnds = new int[oldLength + INCREMENT];
2431 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2432 lineEnds[linePtr] = separatorPos;
2434 // look-ahead for merged cr+lf
2435 if (source[currentPosition] == '\n') {
2436 //System.out.println("look-ahead LF-" + currentPosition);
2437 lineEnds[linePtr] = currentPosition;
2445 if (currentCharacter == '\n') {
2446 //must merge eventual cr followed by lf
2447 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2448 //System.out.println("merge LF-" + (currentPosition - 1));
2449 lineEnds[linePtr] = currentPosition - 6;
2451 int separatorPos = currentPosition - 6;
2452 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2454 // System.out.println("LF-" + separatorPos);
2456 lineEnds[++linePtr] = separatorPos;
2457 } catch (IndexOutOfBoundsException e) {
2458 //linePtr value is correct
2459 int oldLength = lineEnds.length;
2460 int[] old = lineEnds;
2461 lineEnds = new int[oldLength + INCREMENT];
2462 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2463 lineEnds[linePtr] = separatorPos;
2470 public final void recordComment(boolean isJavadoc) {
2472 // a new annotation comment is recorded
2474 commentStops[++commentPtr] =
2475 isJavadoc ? currentPosition : -currentPosition;
2476 } catch (IndexOutOfBoundsException e) {
2477 int oldStackLength = commentStops.length;
2478 int[] oldStack = commentStops;
2479 commentStops = new int[oldStackLength + 30];
2480 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2481 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2482 //grows the positions buffers too
2483 int[] old = commentStarts;
2484 commentStarts = new int[oldStackLength + 30];
2485 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2488 //the buffer is of a correct size here
2489 commentStarts[commentPtr] = startPosition;
2491 public void resetTo(int begin, int end) {
2492 //reset the scanner to a given position where it may rescan again
2495 initialPosition = startPosition = currentPosition = begin;
2496 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2497 commentPtr = -1; // reset comment stack
2500 public final void scanSingleQuotedEscapeCharacter()
2501 throws InvalidInputException {
2502 // the string with "\\u" is a legal string of two chars \ and u
2503 //thus we use a direct access to the source (for regular cases).
2505 // if (unicodeAsBackSlash) {
2506 // // consume next character
2507 // unicodeAsBackSlash = false;
2508 // if (((currentCharacter = source[currentPosition++]) == '\\')
2509 // && (source[currentPosition] == 'u')) {
2510 // getNextUnicodeChar();
2512 // if (withoutUnicodePtr != 0) {
2513 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2517 currentCharacter = source[currentPosition++];
2518 switch (currentCharacter) {
2520 currentCharacter = '\'';
2523 currentCharacter = '\\';
2526 currentCharacter = '\\';
2531 public final void scanDoubleQuotedEscapeCharacter()
2532 throws InvalidInputException {
2533 // the string with "\\u" is a legal string of two chars \ and u
2534 //thus we use a direct access to the source (for regular cases).
2536 // if (unicodeAsBackSlash) {
2537 // // consume next character
2538 // unicodeAsBackSlash = false;
2539 // if (((currentCharacter = source[currentPosition++]) == '\\')
2540 // && (source[currentPosition] == 'u')) {
2541 // getNextUnicodeChar();
2543 // if (withoutUnicodePtr != 0) {
2544 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2548 currentCharacter = source[currentPosition++];
2549 switch (currentCharacter) {
2551 // currentCharacter = '\b';
2554 currentCharacter = '\t';
2557 currentCharacter = '\n';
2560 // currentCharacter = '\f';
2563 currentCharacter = '\r';
2566 currentCharacter = '\"';
2569 currentCharacter = '\'';
2572 currentCharacter = '\\';
2575 currentCharacter = '$';
2578 // -----------octal escape--------------
2580 // OctalDigit OctalDigit
2581 // ZeroToThree OctalDigit OctalDigit
2583 int number = Character.getNumericValue(currentCharacter);
2584 if (number >= 0 && number <= 7) {
2585 boolean zeroToThreeNot = number > 3;
2587 .isDigit(currentCharacter = source[currentPosition++])) {
2588 int digit = Character.getNumericValue(currentCharacter);
2589 if (digit >= 0 && digit <= 7) {
2590 number = (number * 8) + digit;
2592 .isDigit(currentCharacter = source[currentPosition++])) {
2593 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2596 digit = Character.getNumericValue(currentCharacter);
2597 if (digit >= 0 && digit <= 7) {
2598 // has read \ZeroToThree OctalDigit OctalDigit
2599 number = (number * 8) + digit;
2600 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2604 } else { // has read \OctalDigit NonDigit--> ignore last character
2607 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2610 } else { // has read \OctalDigit --> ignore last character
2614 throw new InvalidInputException(INVALID_ESCAPE);
2615 currentCharacter = (char) number;
2618 // throw new InvalidInputException(INVALID_ESCAPE);
2622 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2623 // return scanIdentifierOrKeyword( false );
2626 public int scanIdentifierOrKeyword(boolean isVariable)
2627 throws InvalidInputException {
2630 //first dispatch on the first char.
2631 //then the length. If there are several
2632 //keywors with the same length AND the same first char, then do another
2633 //disptach on the second char :-)...cool....but fast !
2635 useAssertAsAnIndentifier = false;
2637 while (getNextCharAsJavaIdentifierPart()) {
2641 return TokenNameVariable;
2646 // if (withoutUnicodePtr == 0)
2648 //quick test on length == 1 but not on length > 12 while most identifier
2649 //have a length which is <= 12...but there are lots of identifier with
2653 if ((length = currentPosition - startPosition) == 1)
2654 return TokenNameIdentifier;
2656 data = new char[length];
2657 index = startPosition;
2658 for (int i = 0; i < length; i++) {
2659 data[i] = Character.toLowerCase(source[index + i]);
2663 // if ((length = withoutUnicodePtr) == 1)
2664 // return TokenNameIdentifier;
2665 // // data = withoutUnicodeBuffer;
2666 // data = new char[withoutUnicodeBuffer.length];
2667 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2668 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2673 firstLetter = data[index];
2674 switch (firstLetter) {
2676 case 'a' : // as and array
2679 if ((data[++index] == 's')) {
2682 return TokenNameIdentifier;
2685 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2686 return TokenNameAND;
2688 return TokenNameIdentifier;
2691 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2692 // return TokenNamearray;
2694 // return TokenNameIdentifier;
2696 return TokenNameIdentifier;
2701 if ((data[++index] == 'r')
2702 && (data[++index] == 'e')
2703 && (data[++index] == 'a')
2704 && (data[++index] == 'k'))
2705 return TokenNamebreak;
2707 return TokenNameIdentifier;
2709 return TokenNameIdentifier;
2712 case 'c' : //case class continue
2715 if ((data[++index] == 'a')
2716 && (data[++index] == 's')
2717 && (data[++index] == 'e'))
2718 return TokenNamecase;
2720 return TokenNameIdentifier;
2722 if ((data[++index] == 'l')
2723 && (data[++index] == 'a')
2724 && (data[++index] == 's')
2725 && (data[++index] == 's'))
2726 return TokenNameclass;
2728 return TokenNameIdentifier;
2730 if ((data[++index] == 'o')
2731 && (data[++index] == 'n')
2732 && (data[++index] == 't')
2733 && (data[++index] == 'i')
2734 && (data[++index] == 'n')
2735 && (data[++index] == 'u')
2736 && (data[++index] == 'e'))
2737 return TokenNamecontinue;
2739 return TokenNameIdentifier;
2741 return TokenNameIdentifier;
2744 case 'd' : //define default do
2747 if ((data[++index] == 'o'))
2750 return TokenNameIdentifier;
2752 if ((data[++index] == 'e')
2753 && (data[++index] == 'f')
2754 && (data[++index] == 'i')
2755 && (data[++index] == 'n')
2756 && (data[++index] == 'e'))
2757 return TokenNamedefine;
2759 return TokenNameIdentifier;
2761 if ((data[++index] == 'e')
2762 && (data[++index] == 'f')
2763 && (data[++index] == 'a')
2764 && (data[++index] == 'u')
2765 && (data[++index] == 'l')
2766 && (data[++index] == 't'))
2767 return TokenNamedefault;
2769 return TokenNameIdentifier;
2771 return TokenNameIdentifier;
2773 case 'e' : //echo else elseif extends
2776 if ((data[++index] == 'c')
2777 && (data[++index] == 'h')
2778 && (data[++index] == 'o'))
2779 return TokenNameecho;
2781 (data[index] == 'l')
2782 && (data[++index] == 's')
2783 && (data[++index] == 'e'))
2784 return TokenNameelse;
2786 return TokenNameIdentifier;
2788 if ((data[++index] == 'n')
2789 && (data[++index] == 'd')
2790 && (data[++index] == 'i')
2791 && (data[++index] == 'f'))
2792 return TokenNameendif;
2794 return TokenNameIdentifier;
2796 if ((data[++index] == 'n')
2797 && (data[++index] == 'd')
2798 && (data[++index] == 'f')
2799 && (data[++index] == 'o')
2800 && (data[++index] == 'r'))
2801 return TokenNameendfor;
2803 (data[index] == 'l')
2804 && (data[++index] == 's')
2805 && (data[++index] == 'e')
2806 && (data[++index] == 'i')
2807 && (data[++index] == 'f'))
2808 return TokenNameelseif;
2810 return TokenNameIdentifier;
2812 if ((data[++index] == 'x')
2813 && (data[++index] == 't')
2814 && (data[++index] == 'e')
2815 && (data[++index] == 'n')
2816 && (data[++index] == 'd')
2817 && (data[++index] == 's'))
2818 return TokenNameextends;
2820 return TokenNameIdentifier;
2821 case 8 : // endwhile
2822 if ((data[++index] == 'n')
2823 && (data[++index] == 'd')
2824 && (data[++index] == 'w')
2825 && (data[++index] == 'h')
2826 && (data[++index] == 'i')
2827 && (data[++index] == 'l')
2828 && (data[++index] == 'e'))
2829 return TokenNameendwhile;
2831 return TokenNameIdentifier;
2832 case 9 : // endswitch
2833 if ((data[++index] == 'n')
2834 && (data[++index] == 'd')
2835 && (data[++index] == 's')
2836 && (data[++index] == 'w')
2837 && (data[++index] == 'i')
2838 && (data[++index] == 't')
2839 && (data[++index] == 'c')
2840 && (data[++index] == 'h'))
2841 return TokenNameendswitch;
2843 return TokenNameIdentifier;
2844 case 10 : // endforeach
2845 if ((data[++index] == 'n')
2846 && (data[++index] == 'd')
2847 && (data[++index] == 'f')
2848 && (data[++index] == 'o')
2849 && (data[++index] == 'r')
2850 && (data[++index] == 'e')
2851 && (data[++index] == 'a')
2852 && (data[++index] == 'c')
2853 && (data[++index] == 'h'))
2854 return TokenNameendforeach;
2856 return TokenNameIdentifier;
2859 return TokenNameIdentifier;
2862 case 'f' : //for false function
2865 if ((data[++index] == 'o') && (data[++index] == 'r'))
2866 return TokenNamefor;
2868 return TokenNameIdentifier;
2870 if ((data[++index] == 'a')
2871 && (data[++index] == 'l')
2872 && (data[++index] == 's')
2873 && (data[++index] == 'e'))
2874 return TokenNamefalse;
2876 return TokenNameIdentifier;
2877 case 7 : // function
2878 if ((data[++index] == 'o')
2879 && (data[++index] == 'r')
2880 && (data[++index] == 'e')
2881 && (data[++index] == 'a')
2882 && (data[++index] == 'c')
2883 && (data[++index] == 'h'))
2884 return TokenNameforeach;
2886 return TokenNameIdentifier;
2887 case 8 : // function
2888 if ((data[++index] == 'u')
2889 && (data[++index] == 'n')
2890 && (data[++index] == 'c')
2891 && (data[++index] == 't')
2892 && (data[++index] == 'i')
2893 && (data[++index] == 'o')
2894 && (data[++index] == 'n'))
2895 return TokenNamefunction;
2897 return TokenNameIdentifier;
2899 return TokenNameIdentifier;
2903 if ((data[++index] == 'l')
2904 && (data[++index] == 'o')
2905 && (data[++index] == 'b')
2906 && (data[++index] == 'a')
2907 && (data[++index] == 'l')) {
2908 return TokenNameglobal;
2911 return TokenNameIdentifier;
2916 if (data[++index] == 'f')
2919 return TokenNameIdentifier;
2921 // if ((data[++index] == 'n') && (data[++index] == 't'))
2922 // return TokenNameint;
2924 // return TokenNameIdentifier;
2926 if ((data[++index] == 'n')
2927 && (data[++index] == 'c')
2928 && (data[++index] == 'l')
2929 && (data[++index] == 'u')
2930 && (data[++index] == 'd')
2931 && (data[++index] == 'e'))
2932 return TokenNameinclude;
2934 return TokenNameIdentifier;
2936 if ((data[++index] == 'n')
2937 && (data[++index] == 'c')
2938 && (data[++index] == 'l')
2939 && (data[++index] == 'u')
2940 && (data[++index] == 'd')
2941 && (data[++index] == 'e')
2942 && (data[++index] == '_')
2943 && (data[++index] == 'o')
2944 && (data[++index] == 'n')
2945 && (data[++index] == 'c')
2946 && (data[++index] == 'e'))
2947 return TokenNameinclude_once;
2949 return TokenNameIdentifier;
2951 return TokenNameIdentifier;
2956 if ((data[++index] == 'i')
2957 && (data[++index] == 's')
2958 && (data[++index] == 't')) {
2959 return TokenNamelist;
2962 return TokenNameIdentifier;
2964 case 'n' : // new null
2967 if ((data[++index] == 'e') && (data[++index] == 'w'))
2968 return TokenNamenew;
2970 return TokenNameIdentifier;
2972 if ((data[++index] == 'u')
2973 && (data[++index] == 'l')
2974 && (data[++index] == 'l'))
2975 return TokenNamenull;
2977 return TokenNameIdentifier;
2980 return TokenNameIdentifier;
2982 case 'o' : // or old_function
2984 if (data[++index] == 'r') {
2988 // if (length == 12) {
2989 // if ((data[++index] == 'l')
2990 // && (data[++index] == 'd')
2991 // && (data[++index] == '_')
2992 // && (data[++index] == 'f')
2993 // && (data[++index] == 'u')
2994 // && (data[++index] == 'n')
2995 // && (data[++index] == 'c')
2996 // && (data[++index] == 't')
2997 // && (data[++index] == 'i')
2998 // && (data[++index] == 'o')
2999 // && (data[++index] == 'n')) {
3000 // return TokenNameold_function;
3003 return TokenNameIdentifier;
3007 if ((data[++index] == 'r')
3008 && (data[++index] == 'i')
3009 && (data[++index] == 'n')
3010 && (data[++index] == 't')) {
3011 return TokenNameprint;
3014 return TokenNameIdentifier;
3015 case 'r' : //return require require_once
3017 if ((data[++index] == 'e')
3018 && (data[++index] == 't')
3019 && (data[++index] == 'u')
3020 && (data[++index] == 'r')
3021 && (data[++index] == 'n')) {
3022 return TokenNamereturn;
3024 } else if (length == 7) {
3025 if ((data[++index] == 'e')
3026 && (data[++index] == 'q')
3027 && (data[++index] == 'u')
3028 && (data[++index] == 'i')
3029 && (data[++index] == 'r')
3030 && (data[++index] == 'e')) {
3031 return TokenNamerequire;
3033 } else if (length == 12) {
3034 if ((data[++index] == 'e')
3035 && (data[++index] == 'q')
3036 && (data[++index] == 'u')
3037 && (data[++index] == 'i')
3038 && (data[++index] == 'r')
3039 && (data[++index] == 'e')
3040 && (data[++index] == '_')
3041 && (data[++index] == 'o')
3042 && (data[++index] == 'n')
3043 && (data[++index] == 'c')
3044 && (data[++index] == 'e')) {
3045 return TokenNamerequire_once;
3048 return TokenNameIdentifier;
3050 case 's' : //static switch
3053 if (data[++index] == 't')
3054 if ((data[++index] == 'a')
3055 && (data[++index] == 't')
3056 && (data[++index] == 'i')
3057 && (data[++index] == 'c')) {
3058 return TokenNamestatic;
3060 return TokenNameIdentifier;
3062 (data[index] == 'w')
3063 && (data[++index] == 'i')
3064 && (data[++index] == 't')
3065 && (data[++index] == 'c')
3066 && (data[++index] == 'h'))
3067 return TokenNameswitch;
3069 return TokenNameIdentifier;
3071 return TokenNameIdentifier;
3078 if ((data[++index] == 'r')
3079 && (data[++index] == 'u')
3080 && (data[++index] == 'e'))
3081 return TokenNametrue;
3083 return TokenNameIdentifier;
3084 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3085 // return TokenNamethis;
3088 return TokenNameIdentifier;
3094 if ((data[++index] == 'a') && (data[++index] == 'r'))
3095 return TokenNamevar;
3097 return TokenNameIdentifier;
3100 return TokenNameIdentifier;
3106 if ((data[++index] == 'h')
3107 && (data[++index] == 'i')
3108 && (data[++index] == 'l')
3109 && (data[++index] == 'e'))
3110 return TokenNamewhile;
3112 return TokenNameIdentifier;
3113 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3114 //return TokenNamewidefp ;
3116 //return TokenNameIdentifier;
3118 return TokenNameIdentifier;
3124 if ((data[++index] == 'o') && (data[++index] == 'r'))
3125 return TokenNameXOR;
3127 return TokenNameIdentifier;
3130 return TokenNameIdentifier;
3133 return TokenNameIdentifier;
3136 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3138 //when entering this method the currentCharacter is the firt
3139 //digit of the number , i.e. it may be preceeded by a . when
3142 boolean floating = dotPrefix;
3143 if ((!dotPrefix) && (currentCharacter == '0')) {
3144 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3145 //force the first char of the hexa number do exist...
3146 // consume next character
3147 unicodeAsBackSlash = false;
3148 currentCharacter = source[currentPosition++];
3149 // if (((currentCharacter = source[currentPosition++]) == '\\')
3150 // && (source[currentPosition] == 'u')) {
3151 // getNextUnicodeChar();
3153 // if (withoutUnicodePtr != 0) {
3154 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3157 if (Character.digit(currentCharacter, 16) == -1)
3158 throw new InvalidInputException(INVALID_HEXA);
3160 while (getNextCharAsDigit(16)) {
3162 // if (getNextChar('l', 'L') >= 0)
3163 // return TokenNameLongLiteral;
3165 return TokenNameIntegerLiteral;
3168 //there is x or X in the number
3169 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3170 if (getNextCharAsDigit()) {
3171 //-------------potential octal-----------------
3172 while (getNextCharAsDigit()) {
3175 // if (getNextChar('l', 'L') >= 0) {
3176 // return TokenNameLongLiteral;
3179 // if (getNextChar('f', 'F') >= 0) {
3180 // return TokenNameFloatingPointLiteral;
3183 if (getNextChar('d', 'D') >= 0) {
3184 return TokenNameDoubleLiteral;
3185 } else { //make the distinction between octal and float ....
3186 if (getNextChar('.')) { //bingo ! ....
3187 while (getNextCharAsDigit()) {
3189 if (getNextChar('e', 'E') >= 0) {
3190 // consume next character
3191 unicodeAsBackSlash = false;
3192 currentCharacter = source[currentPosition++];
3193 // if (((currentCharacter = source[currentPosition++]) == '\\')
3194 // && (source[currentPosition] == 'u')) {
3195 // getNextUnicodeChar();
3197 // if (withoutUnicodePtr != 0) {
3198 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3202 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3203 // consume next character
3204 unicodeAsBackSlash = false;
3205 currentCharacter = source[currentPosition++];
3206 // if (((currentCharacter = source[currentPosition++]) == '\\')
3207 // && (source[currentPosition] == 'u')) {
3208 // getNextUnicodeChar();
3210 // if (withoutUnicodePtr != 0) {
3211 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3212 // currentCharacter;
3216 if (!Character.isDigit(currentCharacter))
3217 throw new InvalidInputException(INVALID_FLOAT);
3218 while (getNextCharAsDigit()) {
3221 // if (getNextChar('f', 'F') >= 0)
3222 // return TokenNameFloatingPointLiteral;
3223 getNextChar('d', 'D'); //jump over potential d or D
3224 return TokenNameDoubleLiteral;
3226 return TokenNameIntegerLiteral;
3234 while (getNextCharAsDigit()) {
3237 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3238 // return TokenNameLongLiteral;
3240 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3241 while (getNextCharAsDigit()) {
3246 //if floating is true both exponant and suffix may be optional
3248 if (getNextChar('e', 'E') >= 0) {
3250 // consume next character
3251 unicodeAsBackSlash = false;
3252 currentCharacter = source[currentPosition++];
3253 // if (((currentCharacter = source[currentPosition++]) == '\\')
3254 // && (source[currentPosition] == 'u')) {
3255 // getNextUnicodeChar();
3257 // if (withoutUnicodePtr != 0) {
3258 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3262 if ((currentCharacter == '-')
3263 || (currentCharacter == '+')) { // consume next character
3264 unicodeAsBackSlash = false;
3265 currentCharacter = source[currentPosition++];
3266 // if (((currentCharacter = source[currentPosition++]) == '\\')
3267 // && (source[currentPosition] == 'u')) {
3268 // getNextUnicodeChar();
3270 // if (withoutUnicodePtr != 0) {
3271 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3275 if (!Character.isDigit(currentCharacter))
3276 throw new InvalidInputException(INVALID_FLOAT);
3277 while (getNextCharAsDigit()) {
3281 if (getNextChar('d', 'D') >= 0)
3282 return TokenNameDoubleLiteral;
3283 // if (getNextChar('f', 'F') >= 0)
3284 // return TokenNameFloatingPointLiteral;
3286 //the long flag has been tested before
3288 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3291 * Search the line number corresponding to a specific position
3294 public final int getLineNumber(int position) {
3296 if (lineEnds == null)
3298 int length = linePtr + 1;
3301 int g = 0, d = length - 1;
3305 if (position < lineEnds[m]) {
3307 } else if (position > lineEnds[m]) {
3313 if (position < lineEnds[m]) {
3319 public void setPHPMode(boolean mode) {
3323 public final void setSource(char[] source) {
3324 //the source-buffer is set to sourceString
3326 if (source == null) {
3327 this.source = new char[0];
3329 this.source = source;
3332 initialPosition = currentPosition = 0;
3333 containsAssertKeyword = false;
3334 withoutUnicodeBuffer = new char[this.source.length];
3338 public String toString() {
3339 if (startPosition == source.length)
3340 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3341 if (currentPosition > source.length)
3342 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3344 char front[] = new char[startPosition];
3345 System.arraycopy(source, 0, front, 0, startPosition);
3347 int middleLength = (currentPosition - 1) - startPosition + 1;
3349 if (middleLength > -1) {
3350 middle = new char[middleLength];
3351 System.arraycopy(source, startPosition, middle, 0, middleLength);
3353 middle = new char[0];
3356 char end[] = new char[source.length - (currentPosition - 1)];
3359 (currentPosition - 1) + 1,
3362 source.length - (currentPosition - 1) - 1);
3364 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3365 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3368 public final String toStringAction(int act) {
3370 case TokenNameERROR :
3371 return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3372 case TokenNameStopPHP :
3373 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3374 case TokenNameIdentifier :
3375 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3376 case TokenNameVariable :
3377 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3379 return "as"; //$NON-NLS-1$
3380 case TokenNamebreak :
3381 return "break"; //$NON-NLS-1$
3382 case TokenNamecase :
3383 return "case"; //$NON-NLS-1$
3384 case TokenNameclass :
3385 return "class"; //$NON-NLS-1$
3386 case TokenNamecontinue :
3387 return "continue"; //$NON-NLS-1$
3388 case TokenNamedefault :
3389 return "default"; //$NON-NLS-1$
3390 case TokenNamedefine :
3391 return "define"; //$NON-NLS-1$
3393 return "do"; //$NON-NLS-1$
3394 case TokenNameecho :
3395 return "echo"; //$NON-NLS-1$
3396 case TokenNameelse :
3397 return "else"; //$NON-NLS-1$
3398 case TokenNameelseif :
3399 return "elseif"; //$NON-NLS-1$
3400 case TokenNameendfor :
3401 return "endfor"; //$NON-NLS-1$
3402 case TokenNameendforeach :
3403 return "endforeach"; //$NON-NLS-1$
3404 case TokenNameendif :
3405 return "endif"; //$NON-NLS-1$
3406 case TokenNameendswitch :
3407 return "endswitch"; //$NON-NLS-1$
3408 case TokenNameendwhile :
3409 return "endwhile"; //$NON-NLS-1$
3410 case TokenNameextends :
3411 return "extends"; //$NON-NLS-1$
3412 case TokenNamefalse :
3413 return "false"; //$NON-NLS-1$
3415 return "for"; //$NON-NLS-1$
3416 case TokenNameforeach :
3417 return "foreach"; //$NON-NLS-1$
3418 case TokenNamefunction :
3419 return "function"; //$NON-NLS-1$
3420 case TokenNameglobal :
3421 return "global"; //$NON-NLS-1$
3423 return "if"; //$NON-NLS-1$
3424 case TokenNameinclude :
3425 return "include"; //$NON-NLS-1$
3426 case TokenNameinclude_once :
3427 return "include_once"; //$NON-NLS-1$
3428 case TokenNamelist :
3429 return "list"; //$NON-NLS-1$
3431 return "new"; //$NON-NLS-1$
3432 case TokenNamenull :
3433 return "null"; //$NON-NLS-1$
3434 case TokenNameprint :
3435 return "print"; //$NON-NLS-1$
3436 case TokenNamerequire :
3437 return "require"; //$NON-NLS-1$
3438 case TokenNamerequire_once :
3439 return "require_once"; //$NON-NLS-1$
3440 case TokenNamereturn :
3441 return "return"; //$NON-NLS-1$
3442 case TokenNamestatic :
3443 return "static"; //$NON-NLS-1$
3444 case TokenNameswitch :
3445 return "switch"; //$NON-NLS-1$
3446 case TokenNametrue :
3447 return "true"; //$NON-NLS-1$
3449 return "var"; //$NON-NLS-1$
3450 case TokenNamewhile :
3451 return "while"; //$NON-NLS-1$
3452 case TokenNameIntegerLiteral :
3453 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3454 case TokenNameDoubleLiteral :
3455 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3456 case TokenNameStringLiteral :
3457 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3458 case TokenNameStringConstant :
3459 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3460 case TokenNameStringInterpolated :
3461 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3462 case TokenNameHEREDOC :
3463 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3465 case TokenNamePLUS_PLUS :
3466 return "++"; //$NON-NLS-1$
3467 case TokenNameMINUS_MINUS :
3468 return "--"; //$NON-NLS-1$
3469 case TokenNameEQUAL_EQUAL :
3470 return "=="; //$NON-NLS-1$
3471 case TokenNameEQUAL_GREATER :
3472 return "=>"; //$NON-NLS-1$
3473 case TokenNameLESS_EQUAL :
3474 return "<="; //$NON-NLS-1$
3475 case TokenNameGREATER_EQUAL :
3476 return ">="; //$NON-NLS-1$
3477 case TokenNameNOT_EQUAL :
3478 return "!="; //$NON-NLS-1$
3479 case TokenNameLEFT_SHIFT :
3480 return "<<"; //$NON-NLS-1$
3481 case TokenNameRIGHT_SHIFT :
3482 return ">>"; //$NON-NLS-1$
3483 case TokenNamePLUS_EQUAL :
3484 return "+="; //$NON-NLS-1$
3485 case TokenNameMINUS_EQUAL :
3486 return "-="; //$NON-NLS-1$
3487 case TokenNameMULTIPLY_EQUAL :
3488 return "*="; //$NON-NLS-1$
3489 case TokenNameDIVIDE_EQUAL :
3490 return "/="; //$NON-NLS-1$
3491 case TokenNameAND_EQUAL :
3492 return "&="; //$NON-NLS-1$
3493 case TokenNameOR_EQUAL :
3494 return "|="; //$NON-NLS-1$
3495 case TokenNameXOR_EQUAL :
3496 return "^="; //$NON-NLS-1$
3497 case TokenNameREMAINDER_EQUAL :
3498 return "%="; //$NON-NLS-1$
3499 case TokenNameLEFT_SHIFT_EQUAL :
3500 return "<<="; //$NON-NLS-1$
3501 case TokenNameRIGHT_SHIFT_EQUAL :
3502 return ">>="; //$NON-NLS-1$
3503 case TokenNameOR_OR :
3504 return "||"; //$NON-NLS-1$
3505 case TokenNameAND_AND :
3506 return "&&"; //$NON-NLS-1$
3507 case TokenNamePLUS :
3508 return "+"; //$NON-NLS-1$
3509 case TokenNameMINUS :
3510 return "-"; //$NON-NLS-1$
3511 case TokenNameMINUS_GREATER :
3514 return "!"; //$NON-NLS-1$
3515 case TokenNameREMAINDER :
3516 return "%"; //$NON-NLS-1$
3518 return "^"; //$NON-NLS-1$
3520 return "&"; //$NON-NLS-1$
3521 case TokenNameMULTIPLY :
3522 return "*"; //$NON-NLS-1$
3524 return "|"; //$NON-NLS-1$
3525 case TokenNameTWIDDLE :
3526 return "~"; //$NON-NLS-1$
3527 case TokenNameTWIDDLE_EQUAL :
3528 return "~="; //$NON-NLS-1$
3529 case TokenNameDIVIDE :
3530 return "/"; //$NON-NLS-1$
3531 case TokenNameGREATER :
3532 return ">"; //$NON-NLS-1$
3533 case TokenNameLESS :
3534 return "<"; //$NON-NLS-1$
3535 case TokenNameLPAREN :
3536 return "("; //$NON-NLS-1$
3537 case TokenNameRPAREN :
3538 return ")"; //$NON-NLS-1$
3539 case TokenNameLBRACE :
3540 return "{"; //$NON-NLS-1$
3541 case TokenNameRBRACE :
3542 return "}"; //$NON-NLS-1$
3543 case TokenNameLBRACKET :
3544 return "["; //$NON-NLS-1$
3545 case TokenNameRBRACKET :
3546 return "]"; //$NON-NLS-1$
3547 case TokenNameSEMICOLON :
3548 return ";"; //$NON-NLS-1$
3549 case TokenNameQUESTION :
3550 return "?"; //$NON-NLS-1$
3551 case TokenNameCOLON :
3552 return ":"; //$NON-NLS-1$
3553 case TokenNameCOMMA :
3554 return ","; //$NON-NLS-1$
3556 return "."; //$NON-NLS-1$
3557 case TokenNameEQUAL :
3558 return "="; //$NON-NLS-1$
3561 case TokenNameDOLLAR_LBRACE :
3564 return "EOF"; //$NON-NLS-1$
3566 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3571 boolean tokenizeComments,
3572 boolean tokenizeWhiteSpace,
3573 boolean checkNonExternalizedStringLiterals) {
3577 checkNonExternalizedStringLiterals,
3582 boolean tokenizeComments,
3583 boolean tokenizeWhiteSpace,
3584 boolean checkNonExternalizedStringLiterals,
3585 boolean assertMode) {
3586 this.eofPosition = Integer.MAX_VALUE;
3587 this.tokenizeComments = tokenizeComments;
3588 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3589 this.checkNonExternalizedStringLiterals =
3590 checkNonExternalizedStringLiterals;
3591 this.assertMode = assertMode;
3594 private void checkNonExternalizeString() throws InvalidInputException {
3595 if (currentLine == null)
3597 parseTags(currentLine);
3600 private void parseTags(NLSLine line) throws InvalidInputException {
3601 String s = new String(getCurrentTokenSource());
3602 int pos = s.indexOf(TAG_PREFIX);
3603 int lineLength = line.size();
3605 int start = pos + TAG_PREFIX_LENGTH;
3606 int end = s.indexOf(TAG_POSTFIX, start);
3607 String index = s.substring(start, end);
3610 i = Integer.parseInt(index) - 1;
3611 // Tags are one based not zero based.
3612 } catch (NumberFormatException e) {
3613 i = -1; // we don't want to consider this as a valid NLS tag
3615 if (line.exists(i)) {
3618 pos = s.indexOf(TAG_PREFIX, start);
3621 this.nonNLSStrings = new StringLiteral[lineLength];
3622 int nonNLSCounter = 0;
3623 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3624 StringLiteral literal = (StringLiteral) iterator.next();
3625 if (literal != null) {
3626 this.nonNLSStrings[nonNLSCounter++] = literal;
3629 if (nonNLSCounter == 0) {
3630 this.nonNLSStrings = null;
3634 this.wasNonExternalizedStringLiteral = true;
3635 if (nonNLSCounter != lineLength) {
3639 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),