1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token
24 * (this value is not memorized by the scanner) - getCurrentTokenSource()
25 * which provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into
27 * the stream - currentPosition-1 gives the sourceEnd position into the
31 // private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
34 // flag indicating if processed source contains occurrences of keyword
36 public boolean containsAssertKeyword = false;
38 public boolean recordLineSeparator;
40 public boolean ignorePHPOneLiner = false;
42 public boolean phpMode = false;
45 * This token is set to TokenNameecho if a short tag block begins (i.e.
46 * >?= ... ) Directly after the "=" character the
47 * getNextToken() method returns TokenNameINLINE_HTML In the next call to
48 * the getNextToken() method the value of fFillerToken (==TokenNameecho) is
52 int fFillerToken = TokenNameEOF;
54 public char currentCharacter;
56 public int startPosition;
58 public int currentPosition;
60 public int initialPosition, eofPosition;
62 // after this position eof are generated instead of real token from the
64 public boolean tokenizeComments;
66 public boolean tokenizeWhiteSpace;
68 public boolean tokenizeStrings;
70 // source should be viewed as a window (aka a part)
71 // of a entire very large stream
75 public char[] withoutUnicodeBuffer;
77 public int withoutUnicodePtr;
79 // when == 0 ==> no unicode in the current token
80 public boolean unicodeAsBackSlash = false;
82 public boolean scanningFloatLiteral = false;
84 // support for /** comments
85 public int[] commentStops = new int[10];
87 public int[] commentStarts = new int[10];
89 public int commentPtr = -1; // no comment test with commentPtr value -1
91 protected int lastCommentLinePosition = -1;
93 // diet parsing support - jump over some method body when requested
94 public boolean diet = false;
96 // support for the poor-line-debuggers ....
97 // remember the position of the cr/lf
98 public int[] lineEnds = new int[250];
100 public int linePtr = -1;
102 public boolean wasAcr = false;
104 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
106 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
108 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
110 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
112 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
114 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
116 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
118 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
120 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
122 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
124 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
126 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
128 // ----------------optimized identifier managment------------------
129 static final char[] charArray_a = new char[] { 'a' },
130 charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
131 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' },
132 charArray_f = new char[] { 'f' }, charArray_g = new char[] { 'g' },
133 charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
134 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' },
135 charArray_l = new char[] { 'l' }, charArray_m = new char[] { 'm' },
136 charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
137 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' },
138 charArray_r = new char[] { 'r' }, charArray_s = new char[] { 's' },
139 charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
140 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' },
141 charArray_x = new char[] { 'x' }, charArray_y = new char[] { 'y' },
142 charArray_z = new char[] { 'z' };
144 static final char[] charArray_va = new char[] { '$', 'a' },
145 charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
146 '$', 'c' }, charArray_vd = new char[] { '$', 'd' },
147 charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] {
148 '$', 'f' }, charArray_vg = new char[] { '$', 'g' },
149 charArray_vh = new char[] { '$', 'h' }, charArray_vi = new char[] {
150 '$', 'i' }, charArray_vj = new char[] { '$', 'j' },
151 charArray_vk = new char[] { '$', 'k' }, charArray_vl = new char[] {
152 '$', 'l' }, charArray_vm = new char[] { '$', 'm' },
153 charArray_vn = new char[] { '$', 'n' }, charArray_vo = new char[] {
154 '$', 'o' }, charArray_vp = new char[] { '$', 'p' },
155 charArray_vq = new char[] { '$', 'q' }, charArray_vr = new char[] {
156 '$', 'r' }, charArray_vs = new char[] { '$', 's' },
157 charArray_vt = new char[] { '$', 't' }, charArray_vu = new char[] {
158 '$', 'u' }, charArray_vv = new char[] { '$', 'v' },
159 charArray_vw = new char[] { '$', 'w' }, charArray_vx = new char[] {
160 '$', 'x' }, charArray_vy = new char[] { '$', 'y' },
161 charArray_vz = new char[] { '$', 'z' };
163 public final static int MAX_OBVIOUS = 256;
165 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
167 public final static int C_DOLLAR = 8;
169 public final static int C_LETTER = 4;
171 public final static int C_DIGIT = 3;
173 public final static int C_SEPARATOR = 2;
175 public final static int C_SPACE = 1;
177 for (int i = '0'; i <= '9'; i++)
178 ObviousIdentCharNatures[i] = C_DIGIT;
180 for (int i = 'a'; i <= 'z'; i++)
181 ObviousIdentCharNatures[i] = C_LETTER;
182 for (int i = 'A'; i <= 'Z'; i++)
183 ObviousIdentCharNatures[i] = C_LETTER;
184 ObviousIdentCharNatures['_'] = C_LETTER;
185 for (int i = 127; i <= 255; i++)
186 ObviousIdentCharNatures[i] = C_LETTER;
188 ObviousIdentCharNatures['$'] = C_DOLLAR;
190 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
191 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
192 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
193 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
194 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL
197 ObviousIdentCharNatures['.'] = C_SEPARATOR;
198 ObviousIdentCharNatures[':'] = C_SEPARATOR;
199 ObviousIdentCharNatures[';'] = C_SEPARATOR;
200 ObviousIdentCharNatures[','] = C_SEPARATOR;
201 ObviousIdentCharNatures['['] = C_SEPARATOR;
202 ObviousIdentCharNatures[']'] = C_SEPARATOR;
203 ObviousIdentCharNatures['('] = C_SEPARATOR;
204 ObviousIdentCharNatures[')'] = C_SEPARATOR;
205 ObviousIdentCharNatures['{'] = C_SEPARATOR;
206 ObviousIdentCharNatures['}'] = C_SEPARATOR;
207 ObviousIdentCharNatures['+'] = C_SEPARATOR;
208 ObviousIdentCharNatures['-'] = C_SEPARATOR;
209 ObviousIdentCharNatures['*'] = C_SEPARATOR;
210 ObviousIdentCharNatures['/'] = C_SEPARATOR;
211 ObviousIdentCharNatures['='] = C_SEPARATOR;
212 ObviousIdentCharNatures['&'] = C_SEPARATOR;
213 ObviousIdentCharNatures['|'] = C_SEPARATOR;
214 ObviousIdentCharNatures['?'] = C_SEPARATOR;
215 ObviousIdentCharNatures['<'] = C_SEPARATOR;
216 ObviousIdentCharNatures['>'] = C_SEPARATOR;
217 ObviousIdentCharNatures['!'] = C_SEPARATOR;
218 ObviousIdentCharNatures['%'] = C_SEPARATOR;
219 ObviousIdentCharNatures['^'] = C_SEPARATOR;
220 ObviousIdentCharNatures['~'] = C_SEPARATOR;
221 ObviousIdentCharNatures['"'] = C_SEPARATOR;
222 ObviousIdentCharNatures['\''] = C_SEPARATOR;
225 static final char[] initCharArray = new char[] { '\u0000', '\u0000',
226 '\u0000', '\u0000', '\u0000', '\u0000' };
228 static final int TableSize = 30, InternalTableSize = 6;
230 // 30*6 = 180 entries
231 public static final int OptimizedLength = 6;
234 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
236 // support for detecting non-externalized string literals
237 int currentLineNr = -1;
239 int previousLineNr = -1;
241 NLSLine currentLine = null;
243 List lines = new ArrayList();
245 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
247 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
249 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
251 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
253 public StringLiteral[] nonNLSStrings = null;
255 public boolean checkNonExternalizedStringLiterals = true;
257 public boolean wasNonExternalizedStringLiteral = false;
260 for (int i = 0; i < 6; i++) {
261 for (int j = 0; j < TableSize; j++) {
262 for (int k = 0; k < InternalTableSize; k++) {
263 charArray_length[i][j][k] = initCharArray;
269 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
272 public static final int RoundBracket = 0;
274 public static final int SquareBracket = 1;
276 public static final int CurlyBracket = 2;
278 public static final int BracketKinds = 3;
281 public char[][] foundTaskTags = null;
283 public char[][] foundTaskMessages;
285 public char[][] foundTaskPriorities = null;
287 public int[][] foundTaskPositions;
289 public int foundTaskCount = 0;
291 public char[][] taskTags = null;
293 public char[][] taskPriorities = null;
295 public boolean isTaskCaseSensitive = true;
297 public static final boolean DEBUG = false;
299 public static final boolean TRACE = false;
301 public ICompilationUnit compilationUnit = null;
304 * Determines if the specified character is permissible as the first
305 * character in a PHP identifier or variable
307 * The '$' character for PHP variables is regarded as a correct first
311 public static boolean isPHPIdentOrVarStart(char ch) {
312 if (ch < MAX_OBVIOUS) {
313 return ObviousIdentCharNatures[ch] == C_LETTER
314 || ObviousIdentCharNatures[ch] == C_DOLLAR;
317 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F
319 // ch && ch <= 0xFF);
323 * Determines if the specified character is permissible as the first
324 * character in a PHP identifier.
326 * The '$' character for PHP variables isn't regarded as the first character !
328 public static boolean isPHPIdentifierStart(char ch) {
329 if (ch < MAX_OBVIOUS) {
330 return ObviousIdentCharNatures[ch] == C_LETTER;
333 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
338 * Determines if the specified character may be part of a PHP identifier as
339 * other than the first character
341 public static boolean isPHPIdentifierPart(char ch) {
342 if (ch < MAX_OBVIOUS) {
343 return ObviousIdentCharNatures[ch] == C_LETTER
344 || ObviousIdentCharNatures[ch] == C_DIGIT;
347 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch &&
352 public static boolean isSQLIdentifierPart(char ch) {
353 if (ch < MAX_OBVIOUS) {
354 return ObviousIdentCharNatures[ch] == C_LETTER
355 || ObviousIdentCharNatures[ch] == C_DIGIT;
360 public final boolean atEnd() {
361 // This code is not relevant if source is
362 // Only a part of the real stream input
363 return source.length == currentPosition;
366 public char[] getCurrentIdentifierSource() {
367 // return the token REAL source (aka unicodes are precomputed)
369 // if (withoutUnicodePtr != 0)
370 // //0 is used as a fast test flag so the real first char is in position
373 // withoutUnicodeBuffer,
375 // result = new char[withoutUnicodePtr],
377 // withoutUnicodePtr);
379 int length = currentPosition - startPosition;
380 switch (length) { // see OptimizedLength
382 return optimizedCurrentTokenSource1();
384 return optimizedCurrentTokenSource2();
386 return optimizedCurrentTokenSource3();
388 return optimizedCurrentTokenSource4();
390 return optimizedCurrentTokenSource5();
392 return optimizedCurrentTokenSource6();
395 System.arraycopy(source, startPosition, result = new char[length], 0,
401 public int getCurrentTokenEndPosition() {
402 return this.currentPosition - 1;
405 public final char[] getCurrentTokenSource() {
406 // Return the token REAL source (aka unicodes are precomputed)
408 // if (withoutUnicodePtr != 0)
409 // // 0 is used as a fast test flag so the real first char is in
412 // withoutUnicodeBuffer,
414 // result = new char[withoutUnicodePtr],
416 // withoutUnicodePtr);
419 System.arraycopy(source, startPosition,
420 result = new char[length = currentPosition - startPosition], 0,
426 public final char[] getCurrentTokenSource(int startPos) {
427 // Return the token REAL source (aka unicodes are precomputed)
429 // if (withoutUnicodePtr != 0)
430 // // 0 is used as a fast test flag so the real first char is in
433 // withoutUnicodeBuffer,
435 // result = new char[withoutUnicodePtr],
437 // withoutUnicodePtr);
440 System.arraycopy(source, startPos,
441 result = new char[length = currentPosition - startPos], 0,
447 public final char[] getCurrentTokenSourceString() {
448 // return the token REAL source (aka unicodes are precomputed).
449 // REMOVE the two " that are at the beginning and the end.
451 if (withoutUnicodePtr != 0)
452 // 0 is used as a fast test flag so the real first char is in
454 System.arraycopy(withoutUnicodeBuffer, 2,
455 // 2 is 1 (real start) + 1 (to jump over the ")
456 result = new char[withoutUnicodePtr - 2], 0,
457 withoutUnicodePtr - 2);
460 System.arraycopy(source, startPosition + 1,
461 result = new char[length = currentPosition - startPosition
467 public final boolean equalsCurrentTokenSource(char[] word) {
468 if (word.length != currentPosition - startPosition) {
471 for (int i = 0; i < word.length; i++) {
472 if (word[i] != source[startPosition + i]) {
479 public final char[] getRawTokenSourceEnd() {
480 int length = this.eofPosition - this.currentPosition - 1;
481 char[] sourceEnd = new char[length];
482 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0,
487 public int getCurrentTokenStartPosition() {
488 return this.startPosition;
491 public final String getCurrentStringLiteral() {
492 char[] result = getCurrentStringLiteralSource();
493 return new String(result);
496 public final char[] getCurrentStringLiteralSource() {
497 // Return the token REAL source (aka unicodes are precomputed)
498 if (startPosition + 1 >= currentPosition) {
504 .arraycopy(source, startPosition + 1,
505 result = new char[length = currentPosition
506 - startPosition - 2], 0, length);
511 public final char[] getCurrentStringLiteralSource(int startPos) {
512 // Return the token REAL source (aka unicodes are precomputed)
515 System.arraycopy(source, startPos + 1,
516 result = new char[length = currentPosition - startPos - 2], 0,
523 * Search the source position corresponding to the end of a given line
526 * Line numbers are 1-based, and relative to the scanner initialPosition.
527 * Character positions are 0-based.
529 * In case the given line number is inconsistent, answers -1.
531 public final int getLineEnd(int lineNumber) {
532 if (lineEnds == null)
534 if (lineNumber >= lineEnds.length)
538 if (lineNumber == lineEnds.length - 1)
540 return lineEnds[lineNumber - 1];
541 // next line start one character behind the lineEnd of the previous line
545 * Search the source position corresponding to the beginning of a given line
548 * Line numbers are 1-based, and relative to the scanner initialPosition.
549 * Character positions are 0-based.
551 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
553 * In case the given line number is inconsistent, answers -1.
555 public final int getLineStart(int lineNumber) {
556 if (lineEnds == null)
558 if (lineNumber >= lineEnds.length)
563 return initialPosition;
564 return lineEnds[lineNumber - 2] + 1;
565 // next line start one character behind the lineEnd of the previous line
568 public final boolean getNextChar(char testedChar) {
570 // handle the case of unicode.
571 // when a unicode appears then we must use a buffer that holds char
573 // At the end of this method currentCharacter holds the new visited char
574 // and currentPosition points right next after it
575 // Both previous lines are true if the currentCharacter is == to the
577 // On false, no side effect has occured.
578 // ALL getNextChar.... ARE OPTIMIZED COPIES
579 int temp = currentPosition;
581 currentCharacter = source[currentPosition++];
582 // if (((currentCharacter = source[currentPosition++]) == '\\')
583 // && (source[currentPosition] == 'u')) {
584 // //-------------unicode traitement ------------
585 // int c1, c2, c3, c4;
586 // int unicodeSize = 6;
587 // currentPosition++;
588 // while (source[currentPosition] == 'u') {
589 // currentPosition++;
593 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
596 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
599 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
602 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
605 // currentPosition = temp;
609 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
610 // if (currentCharacter != testedChar) {
611 // currentPosition = temp;
614 // unicodeAsBackSlash = currentCharacter == '\\';
616 // //need the unicode buffer
617 // if (withoutUnicodePtr == 0) {
618 // //buffer all the entries that have been left aside....
619 // withoutUnicodePtr = currentPosition - unicodeSize -
624 // withoutUnicodeBuffer,
626 // withoutUnicodePtr);
628 // //fill the buffer with the char
629 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
632 // } //-------------end unicode traitement--------------
634 if (currentCharacter != testedChar) {
635 currentPosition = temp;
638 unicodeAsBackSlash = false;
639 // if (withoutUnicodePtr != 0)
640 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
643 } catch (IndexOutOfBoundsException e) {
644 unicodeAsBackSlash = false;
645 currentPosition = temp;
650 public final int getNextChar(char testedChar1, char testedChar2) {
651 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
652 // test can be done with (x==0) for the first and (x>0) for the second
653 // handle the case of unicode.
654 // when a unicode appears then we must use a buffer that holds char
656 // At the end of this method currentCharacter holds the new visited char
657 // and currentPosition points right next after it
658 // Both previous lines are true if the currentCharacter is == to the
660 // On false, no side effect has occured.
661 // ALL getNextChar.... ARE OPTIMIZED COPIES
662 int temp = currentPosition;
665 currentCharacter = source[currentPosition++];
666 // if (((currentCharacter = source[currentPosition++]) == '\\')
667 // && (source[currentPosition] == 'u')) {
668 // //-------------unicode traitement ------------
669 // int c1, c2, c3, c4;
670 // int unicodeSize = 6;
671 // currentPosition++;
672 // while (source[currentPosition] == 'u') {
673 // currentPosition++;
677 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
680 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
683 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
686 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
689 // currentPosition = temp;
693 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
694 // if (currentCharacter == testedChar1)
696 // else if (currentCharacter == testedChar2)
699 // currentPosition = temp;
703 // //need the unicode buffer
704 // if (withoutUnicodePtr == 0) {
705 // //buffer all the entries that have been left aside....
706 // withoutUnicodePtr = currentPosition - unicodeSize -
711 // withoutUnicodeBuffer,
713 // withoutUnicodePtr);
715 // //fill the buffer with the char
716 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
718 // } //-------------end unicode traitement--------------
720 if (currentCharacter == testedChar1)
722 else if (currentCharacter == testedChar2)
725 currentPosition = temp;
728 // if (withoutUnicodePtr != 0)
729 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
732 } catch (IndexOutOfBoundsException e) {
733 currentPosition = temp;
738 public final boolean getNextCharAsDigit() {
740 // handle the case of unicode.
741 // when a unicode appears then we must use a buffer that holds char
743 // At the end of this method currentCharacter holds the new visited char
744 // and currentPosition points right next after it
745 // Both previous lines are true if the currentCharacter is a digit
746 // On false, no side effect has occured.
747 // ALL getNextChar.... ARE OPTIMIZED COPIES
748 int temp = currentPosition;
750 currentCharacter = source[currentPosition++];
751 // if (((currentCharacter = source[currentPosition++]) == '\\')
752 // && (source[currentPosition] == 'u')) {
753 // //-------------unicode traitement ------------
754 // int c1, c2, c3, c4;
755 // int unicodeSize = 6;
756 // currentPosition++;
757 // while (source[currentPosition] == 'u') {
758 // currentPosition++;
762 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
765 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
768 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
771 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
774 // currentPosition = temp;
778 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
779 // if (!Character.isDigit(currentCharacter)) {
780 // currentPosition = temp;
784 // //need the unicode buffer
785 // if (withoutUnicodePtr == 0) {
786 // //buffer all the entries that have been left aside....
787 // withoutUnicodePtr = currentPosition - unicodeSize -
792 // withoutUnicodeBuffer,
794 // withoutUnicodePtr);
796 // //fill the buffer with the char
797 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
799 // } //-------------end unicode traitement--------------
801 if (!Character.isDigit(currentCharacter)) {
802 currentPosition = temp;
805 // if (withoutUnicodePtr != 0)
806 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
809 } catch (IndexOutOfBoundsException e) {
810 currentPosition = temp;
815 public final boolean getNextCharAsDigit(int radix) {
817 // handle the case of unicode.
818 // when a unicode appears then we must use a buffer that holds char
820 // At the end of this method currentCharacter holds the new visited char
821 // and currentPosition points right next after it
822 // Both previous lines are true if the currentCharacter is a digit base
825 // On false, no side effect has occured.
826 // ALL getNextChar.... ARE OPTIMIZED COPIES
827 int temp = currentPosition;
829 currentCharacter = source[currentPosition++];
830 // if (((currentCharacter = source[currentPosition++]) == '\\')
831 // && (source[currentPosition] == 'u')) {
832 // //-------------unicode traitement ------------
833 // int c1, c2, c3, c4;
834 // int unicodeSize = 6;
835 // currentPosition++;
836 // while (source[currentPosition] == 'u') {
837 // currentPosition++;
841 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
844 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
847 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
850 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
853 // currentPosition = temp;
857 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
858 // if (Character.digit(currentCharacter, radix) == -1) {
859 // currentPosition = temp;
863 // //need the unicode buffer
864 // if (withoutUnicodePtr == 0) {
865 // //buffer all the entries that have been left aside....
866 // withoutUnicodePtr = currentPosition - unicodeSize -
871 // withoutUnicodeBuffer,
873 // withoutUnicodePtr);
875 // //fill the buffer with the char
876 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
878 // } //-------------end unicode traitement--------------
880 if (Character.digit(currentCharacter, radix) == -1) {
881 currentPosition = temp;
884 // if (withoutUnicodePtr != 0)
885 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
888 } catch (IndexOutOfBoundsException e) {
889 currentPosition = temp;
894 public boolean getNextCharAsJavaIdentifierPart() {
896 // handle the case of unicode.
897 // when a unicode appears then we must use a buffer that holds char
899 // At the end of this method currentCharacter holds the new visited char
900 // and currentPosition points right next after it
901 // Both previous lines are true if the currentCharacter is a
902 // JavaIdentifierPart
903 // On false, no side effect has occured.
904 // ALL getNextChar.... ARE OPTIMIZED COPIES
905 int temp = currentPosition;
907 currentCharacter = source[currentPosition++];
908 // if (((currentCharacter = source[currentPosition++]) == '\\')
909 // && (source[currentPosition] == 'u')) {
910 // //-------------unicode traitement ------------
911 // int c1, c2, c3, c4;
912 // int unicodeSize = 6;
913 // currentPosition++;
914 // while (source[currentPosition] == 'u') {
915 // currentPosition++;
919 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
922 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
925 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
928 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
931 // currentPosition = temp;
935 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
936 // if (!isPHPIdentifierPart(currentCharacter)) {
937 // currentPosition = temp;
941 // //need the unicode buffer
942 // if (withoutUnicodePtr == 0) {
943 // //buffer all the entries that have been left aside....
944 // withoutUnicodePtr = currentPosition - unicodeSize -
949 // withoutUnicodeBuffer,
951 // withoutUnicodePtr);
953 // //fill the buffer with the char
954 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
956 // } //-------------end unicode traitement--------------
958 if (!isPHPIdentifierPart(currentCharacter)) {
959 currentPosition = temp;
962 // if (withoutUnicodePtr != 0)
963 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
966 } catch (IndexOutOfBoundsException e) {
967 currentPosition = temp;
972 public int getCastOrParen() {
973 int tempPosition = currentPosition;
974 char tempCharacter = currentCharacter;
975 int tempToken = TokenNameLPAREN;
976 boolean found = false;
977 StringBuffer buf = new StringBuffer();
980 currentCharacter = source[currentPosition++];
981 } while (currentCharacter == ' ' || currentCharacter == '\t');
982 while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
983 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
984 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
985 buf.append(currentCharacter);
986 currentCharacter = source[currentPosition++];
988 if (buf.length() >= 3 && buf.length() <= 7) {
989 char[] data = buf.toString().toCharArray();
991 switch (data.length) {
994 if ((data[index] == 'i') && (data[++index] == 'n')
995 && (data[++index] == 't')) {
997 tempToken = TokenNameintCAST;
1002 if ((data[index] == 'b') && (data[++index] == 'o')
1003 && (data[++index] == 'o') && (data[++index] == 'l')) {
1005 tempToken = TokenNameboolCAST;
1008 if ((data[index] == 'r') && (data[++index] == 'e')
1009 && (data[++index] == 'a')
1010 && (data[++index] == 'l')) {
1012 tempToken = TokenNamedoubleCAST;
1017 // array unset float
1018 if ((data[index] == 'a') && (data[++index] == 'r')
1019 && (data[++index] == 'r') && (data[++index] == 'a')
1020 && (data[++index] == 'y')) {
1022 tempToken = TokenNamearrayCAST;
1025 if ((data[index] == 'u') && (data[++index] == 'n')
1026 && (data[++index] == 's')
1027 && (data[++index] == 'e')
1028 && (data[++index] == 't')) {
1030 tempToken = TokenNameunsetCAST;
1033 if ((data[index] == 'f') && (data[++index] == 'l')
1034 && (data[++index] == 'o')
1035 && (data[++index] == 'a')
1036 && (data[++index] == 't')) {
1038 tempToken = TokenNamedoubleCAST;
1044 // object string double
1045 if ((data[index] == 'o') && (data[++index] == 'b')
1046 && (data[++index] == 'j') && (data[++index] == 'e')
1047 && (data[++index] == 'c') && (data[++index] == 't')) {
1049 tempToken = TokenNameobjectCAST;
1052 if ((data[index] == 's') && (data[++index] == 't')
1053 && (data[++index] == 'r')
1054 && (data[++index] == 'i')
1055 && (data[++index] == 'n')
1056 && (data[++index] == 'g')) {
1058 tempToken = TokenNamestringCAST;
1061 if ((data[index] == 'd') && (data[++index] == 'o')
1062 && (data[++index] == 'u')
1063 && (data[++index] == 'b')
1064 && (data[++index] == 'l')
1065 && (data[++index] == 'e')) {
1067 tempToken = TokenNamedoubleCAST;
1074 if ((data[index] == 'b') && (data[++index] == 'o')
1075 && (data[++index] == 'o') && (data[++index] == 'l')
1076 && (data[++index] == 'e') && (data[++index] == 'a')
1077 && (data[++index] == 'n')) {
1079 tempToken = TokenNameboolCAST;
1082 if ((data[index] == 'i') && (data[++index] == 'n')
1083 && (data[++index] == 't')
1084 && (data[++index] == 'e')
1085 && (data[++index] == 'g')
1086 && (data[++index] == 'e')
1087 && (data[++index] == 'r')) {
1089 tempToken = TokenNameintCAST;
1095 while (currentCharacter == ' ' || currentCharacter == '\t') {
1096 currentCharacter = source[currentPosition++];
1098 if (currentCharacter == ')') {
1103 } catch (IndexOutOfBoundsException e) {
1105 currentCharacter = tempCharacter;
1106 currentPosition = tempPosition;
1107 return TokenNameLPAREN;
1110 public void consumeStringInterpolated() throws InvalidInputException {
1112 // consume next character
1113 unicodeAsBackSlash = false;
1114 currentCharacter = source[currentPosition++];
1115 // if (((currentCharacter = source[currentPosition++]) == '\\')
1116 // && (source[currentPosition] == 'u')) {
1117 // getNextUnicodeChar();
1119 // if (withoutUnicodePtr != 0) {
1120 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1121 // currentCharacter;
1124 while (currentCharacter != '`') {
1125 /** ** in PHP \r and \n are valid in string literals *** */
1126 // if ((currentCharacter == '\n')
1127 // || (currentCharacter == '\r')) {
1128 // // relocate if finding another quote fairly close: thus
1130 // '/u000D' will be fully consumed
1131 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1132 // if (currentPosition + lookAhead == source.length)
1134 // if (source[currentPosition + lookAhead] == '\n')
1136 // if (source[currentPosition + lookAhead] == '\"') {
1137 // currentPosition += lookAhead + 1;
1141 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1143 if (currentCharacter == '\\') {
1144 int escapeSize = currentPosition;
1145 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1146 // scanEscapeCharacter make a side effect on this value and
1148 // the previous value few lines down this one
1149 scanDoubleQuotedEscapeCharacter();
1150 escapeSize = currentPosition - escapeSize;
1151 if (withoutUnicodePtr == 0) {
1152 // buffer all the entries that have been left aside....
1153 withoutUnicodePtr = currentPosition - escapeSize - 1
1155 System.arraycopy(source, startPosition,
1156 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1157 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1158 } else { // overwrite the / in the buffer
1159 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1160 if (backSlashAsUnicodeInString) { // there are TWO \
1162 // where only one is correct
1163 withoutUnicodePtr--;
1166 } else if ((currentCharacter == '\r')
1167 || (currentCharacter == '\n')) {
1168 if (recordLineSeparator) {
1169 pushLineSeparator();
1172 // consume next character
1173 unicodeAsBackSlash = false;
1174 currentCharacter = source[currentPosition++];
1175 // if (((currentCharacter = source[currentPosition++]) == '\\')
1176 // && (source[currentPosition] == 'u')) {
1177 // getNextUnicodeChar();
1179 if (withoutUnicodePtr != 0) {
1180 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1184 } catch (IndexOutOfBoundsException e) {
1185 // reset end position for error reporting
1186 currentPosition -= 2;
1187 throw new InvalidInputException(UNTERMINATED_STRING);
1188 } catch (InvalidInputException e) {
1189 if (e.getMessage().equals(INVALID_ESCAPE)) {
1190 // relocate if finding another quote fairly close: thus unicode
1191 // '/u000D' will be fully consumed
1192 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1193 if (currentPosition + lookAhead == source.length)
1195 if (source[currentPosition + lookAhead] == '\n')
1197 if (source[currentPosition + lookAhead] == '`') {
1198 currentPosition += lookAhead + 1;
1205 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1207 // //$NON-NLS-?$ where ? is an
1209 if (currentLine == null) {
1210 currentLine = new NLSLine();
1211 lines.add(currentLine);
1213 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1214 startPosition, currentPosition - 1));
1218 public void consumeStringConstant() throws InvalidInputException {
1220 // consume next character
1221 unicodeAsBackSlash = false;
1222 currentCharacter = source[currentPosition++];
1223 // if (((currentCharacter = source[currentPosition++]) == '\\')
1224 // && (source[currentPosition] == 'u')) {
1225 // getNextUnicodeChar();
1227 // if (withoutUnicodePtr != 0) {
1228 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1229 // currentCharacter;
1232 while (currentCharacter != '\'') {
1233 /** ** in PHP \r and \n are valid in string literals *** */
1234 // if ((currentCharacter == '\n')
1235 // || (currentCharacter == '\r')) {
1236 // // relocate if finding another quote fairly close: thus
1238 // '/u000D' will be fully consumed
1239 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1240 // if (currentPosition + lookAhead == source.length)
1242 // if (source[currentPosition + lookAhead] == '\n')
1244 // if (source[currentPosition + lookAhead] == '\"') {
1245 // currentPosition += lookAhead + 1;
1249 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1251 if (currentCharacter == '\\') {
1252 int escapeSize = currentPosition;
1253 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1254 // scanEscapeCharacter make a side effect on this value and
1256 // the previous value few lines down this one
1257 scanSingleQuotedEscapeCharacter();
1258 escapeSize = currentPosition - escapeSize;
1259 if (withoutUnicodePtr == 0) {
1260 // buffer all the entries that have been left aside....
1261 withoutUnicodePtr = currentPosition - escapeSize - 1
1263 System.arraycopy(source, startPosition,
1264 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1265 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1266 } else { // overwrite the / in the buffer
1267 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1268 if (backSlashAsUnicodeInString) { // there are TWO \
1270 // where only one is correct
1271 withoutUnicodePtr--;
1274 } else if ((currentCharacter == '\r')
1275 || (currentCharacter == '\n')) {
1276 if (recordLineSeparator) {
1277 pushLineSeparator();
1280 // consume next character
1281 unicodeAsBackSlash = false;
1282 currentCharacter = source[currentPosition++];
1283 // if (((currentCharacter = source[currentPosition++]) == '\\')
1284 // && (source[currentPosition] == 'u')) {
1285 // getNextUnicodeChar();
1287 if (withoutUnicodePtr != 0) {
1288 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1292 } catch (IndexOutOfBoundsException e) {
1293 // reset end position for error reporting
1294 currentPosition -= 2;
1295 throw new InvalidInputException(UNTERMINATED_STRING);
1296 } catch (InvalidInputException e) {
1297 if (e.getMessage().equals(INVALID_ESCAPE)) {
1298 // relocate if finding another quote fairly close: thus unicode
1299 // '/u000D' will be fully consumed
1300 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1301 if (currentPosition + lookAhead == source.length)
1303 if (source[currentPosition + lookAhead] == '\n')
1305 if (source[currentPosition + lookAhead] == '\'') {
1306 currentPosition += lookAhead + 1;
1313 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1315 // //$NON-NLS-?$ where ? is an
1317 if (currentLine == null) {
1318 currentLine = new NLSLine();
1319 lines.add(currentLine);
1321 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1322 startPosition, currentPosition - 1));
1326 public void consumeStringLiteral() throws InvalidInputException {
1328 int openDollarBrace = 0;
1329 // consume next character
1330 unicodeAsBackSlash = false;
1331 currentCharacter = source[currentPosition++];
1332 while (currentCharacter != '"' || openDollarBrace > 0) {
1333 /** ** in PHP \r and \n are valid in string literals *** */
1334 if (currentCharacter == '\\') {
1335 int escapeSize = currentPosition;
1336 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1337 // scanEscapeCharacter make a side effect on this value and
1339 // the previous value few lines down this one
1340 scanDoubleQuotedEscapeCharacter();
1341 escapeSize = currentPosition - escapeSize;
1342 if (withoutUnicodePtr == 0) {
1343 // buffer all the entries that have been left aside....
1344 withoutUnicodePtr = currentPosition - escapeSize - 1
1346 System.arraycopy(source, startPosition,
1347 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1348 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1349 } else { // overwrite the / in the buffer
1350 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1351 if (backSlashAsUnicodeInString) { // there are TWO \
1353 // where only one is correct
1354 withoutUnicodePtr--;
1357 } else if (currentCharacter == '$'
1358 && source[currentPosition] == '{') {
1360 } else if (currentCharacter == '{'
1361 && source[currentPosition] == '$') {
1363 } else if (currentCharacter == '}') {
1365 } else if ((currentCharacter == '\r')
1366 || (currentCharacter == '\n')) {
1367 if (recordLineSeparator) {
1368 pushLineSeparator();
1371 // consume next character
1372 unicodeAsBackSlash = false;
1373 currentCharacter = source[currentPosition++];
1374 if (withoutUnicodePtr != 0) {
1375 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1378 } catch (IndexOutOfBoundsException e) {
1379 // reset end position for error reporting
1380 currentPosition -= 2;
1381 throw new InvalidInputException(UNTERMINATED_STRING);
1382 } catch (InvalidInputException e) {
1383 if (e.getMessage().equals(INVALID_ESCAPE)) {
1384 // relocate if finding another quote fairly close: thus unicode
1385 // '/u000D' will be fully consumed
1386 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1387 if (currentPosition + lookAhead == source.length)
1389 if (source[currentPosition + lookAhead] == '\n')
1391 if (source[currentPosition + lookAhead] == '\"') {
1392 currentPosition += lookAhead + 1;
1399 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1401 // //$NON-NLS-?$ where ? is an
1403 if (currentLine == null) {
1404 currentLine = new NLSLine();
1405 lines.add(currentLine);
1407 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1408 startPosition, currentPosition - 1));
1412 public int getNextToken() throws InvalidInputException {
1414 return getInlinedHTMLToken(currentPosition);
1416 if (fFillerToken != TokenNameEOF) {
1418 startPosition = currentPosition;
1419 tempToken = fFillerToken;
1420 fFillerToken = TokenNameEOF;
1423 this.wasAcr = false;
1425 jumpOverMethodBody();
1427 return currentPosition > source.length ? TokenNameEOF
1432 withoutUnicodePtr = 0;
1433 // ---------Consume white space and handles
1434 // startPosition---------
1435 int whiteStart = currentPosition;
1436 startPosition = currentPosition;
1437 currentCharacter = source[currentPosition++];
1439 while ((currentCharacter == ' ')
1440 || Character.isWhitespace(currentCharacter)) {
1441 if ((currentCharacter == '\r')
1442 || (currentCharacter == '\n')) {
1443 checkNonExternalizeString();
1444 if (recordLineSeparator) {
1445 pushLineSeparator();
1450 startPosition = currentPosition;
1451 currentCharacter = source[currentPosition++];
1453 if (tokenizeWhiteSpace
1454 && (whiteStart != currentPosition - 1)) {
1455 // reposition scanner in case we are interested by
1458 startPosition = whiteStart;
1459 return TokenNameWHITESPACE;
1461 // little trick to get out in the middle of a source
1463 if (currentPosition > eofPosition)
1464 return TokenNameEOF;
1465 // ---------Identify the next token-------------
1466 switch (currentCharacter) {
1468 return getCastOrParen();
1470 return TokenNameRPAREN;
1472 return TokenNameLBRACE;
1474 return TokenNameRBRACE;
1476 return TokenNameLBRACKET;
1478 return TokenNameRBRACKET;
1480 return TokenNameSEMICOLON;
1482 return TokenNameCOMMA;
1484 if (getNextChar('='))
1485 return TokenNameDOT_EQUAL;
1486 if (getNextCharAsDigit())
1487 return scanNumber(true);
1488 return TokenNameDOT;
1491 if ((test = getNextChar('+', '=')) == 0)
1492 return TokenNamePLUS_PLUS;
1494 return TokenNamePLUS_EQUAL;
1495 return TokenNamePLUS;
1499 if ((test = getNextChar('-', '=')) == 0)
1500 return TokenNameMINUS_MINUS;
1502 return TokenNameMINUS_EQUAL;
1503 if (getNextChar('>'))
1504 return TokenNameMINUS_GREATER;
1505 return TokenNameMINUS;
1508 if (getNextChar('='))
1509 return TokenNameTWIDDLE_EQUAL;
1510 return TokenNameTWIDDLE;
1512 if (getNextChar('=')) {
1513 if (getNextChar('=')) {
1514 return TokenNameNOT_EQUAL_EQUAL;
1516 return TokenNameNOT_EQUAL;
1518 return TokenNameNOT;
1520 if (getNextChar('='))
1521 return TokenNameMULTIPLY_EQUAL;
1522 return TokenNameMULTIPLY;
1524 if (getNextChar('='))
1525 return TokenNameREMAINDER_EQUAL;
1526 return TokenNameREMAINDER;
1528 int oldPosition = currentPosition;
1530 currentCharacter = source[currentPosition++];
1531 } catch (IndexOutOfBoundsException e) {
1532 currentPosition = oldPosition;
1533 return TokenNameLESS;
1535 switch (currentCharacter) {
1537 return TokenNameLESS_EQUAL;
1539 return TokenNameNOT_EQUAL;
1541 if (getNextChar('='))
1542 return TokenNameLEFT_SHIFT_EQUAL;
1543 if (getNextChar('<')) {
1544 currentCharacter = source[currentPosition++];
1545 while (Character.isWhitespace(currentCharacter)) {
1546 currentCharacter = source[currentPosition++];
1548 int heredocStart = currentPosition - 1;
1549 int heredocLength = 0;
1550 if (isPHPIdentifierStart(currentCharacter)) {
1551 currentCharacter = source[currentPosition++];
1553 return TokenNameERROR;
1555 while (isPHPIdentifierPart(currentCharacter)) {
1556 currentCharacter = source[currentPosition++];
1558 heredocLength = currentPosition - heredocStart
1560 // heredoc end-tag determination
1561 boolean endTag = true;
1564 ch = source[currentPosition++];
1565 if (ch == '\r' || ch == '\n') {
1566 if (recordLineSeparator) {
1567 pushLineSeparator();
1571 for (int i = 0; i < heredocLength; i++) {
1572 if (source[currentPosition + i] != source[heredocStart
1579 currentPosition += heredocLength - 1;
1580 currentCharacter = source[currentPosition++];
1581 break; // do...while loop
1587 return TokenNameHEREDOC;
1589 return TokenNameLEFT_SHIFT;
1591 currentPosition = oldPosition;
1592 return TokenNameLESS;
1596 if ((test = getNextChar('=', '>')) == 0)
1597 return TokenNameGREATER_EQUAL;
1599 if ((test = getNextChar('=', '>')) == 0)
1600 return TokenNameRIGHT_SHIFT_EQUAL;
1601 return TokenNameRIGHT_SHIFT;
1603 return TokenNameGREATER;
1606 if (getNextChar('=')) {
1607 if (getNextChar('=')) {
1608 return TokenNameEQUAL_EQUAL_EQUAL;
1610 return TokenNameEQUAL_EQUAL;
1612 if (getNextChar('>'))
1613 return TokenNameEQUAL_GREATER;
1614 return TokenNameEQUAL;
1617 if ((test = getNextChar('&', '=')) == 0)
1618 return TokenNameAND_AND;
1620 return TokenNameAND_EQUAL;
1621 return TokenNameAND;
1625 if ((test = getNextChar('|', '=')) == 0)
1626 return TokenNameOR_OR;
1628 return TokenNameOR_EQUAL;
1632 if (getNextChar('='))
1633 return TokenNameXOR_EQUAL;
1634 return TokenNameXOR;
1636 if (getNextChar('>')) {
1638 if (currentPosition == source.length) {
1640 return TokenNameINLINE_HTML;
1642 return getInlinedHTMLToken(currentPosition - 2);
1644 return TokenNameQUESTION;
1646 if (getNextChar(':'))
1647 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1648 return TokenNameCOLON;
1652 return TokenNameForwardSlash;
1654 consumeStringConstant();
1655 return TokenNameStringSingleQuote;
1657 // if (tokenizeStrings) {
1658 consumeStringLiteral();
1659 return TokenNameStringDoubleQuote;
1661 // return TokenNameEncapsedString2;
1663 // if (tokenizeStrings) {
1664 consumeStringInterpolated();
1665 return TokenNameStringInterpolated;
1667 // return TokenNameEncapsedString0;
1670 char startChar = currentCharacter;
1671 if (getNextChar('=') && startChar == '/') {
1672 return TokenNameDIVIDE_EQUAL;
1675 if ((startChar == '#')
1676 || (test = getNextChar('/', '*')) == 0) {
1678 this.lastCommentLinePosition = this.currentPosition;
1679 int endPositionForLineComment = 0;
1680 try { // get the next char
1681 currentCharacter = source[currentPosition++];
1682 // if (((currentCharacter =
1683 // source[currentPosition++])
1685 // && (source[currentPosition] == 'u')) {
1686 // //-------------unicode traitement
1688 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1689 // currentPosition++;
1690 // while (source[currentPosition] == 'u') {
1691 // currentPosition++;
1694 // Character.getNumericValue(source[currentPosition++]))
1698 // Character.getNumericValue(source[currentPosition++]))
1702 // Character.getNumericValue(source[currentPosition++]))
1706 // Character.getNumericValue(source[currentPosition++]))
1710 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1712 // currentCharacter =
1713 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
1717 // handle the \\u case manually into comment
1718 // if (currentCharacter == '\\') {
1719 // if (source[currentPosition] == '\\')
1720 // currentPosition++;
1721 // } //jump over the \\
1722 boolean isUnicode = false;
1723 while (currentCharacter != '\r'
1724 && currentCharacter != '\n') {
1725 this.lastCommentLinePosition = this.currentPosition;
1726 if (currentCharacter == '?') {
1727 if (getNextChar('>')) {
1728 // ?> breaks line comments
1729 startPosition = currentPosition - 2;
1731 return TokenNameINLINE_HTML;
1734 // get the next char
1736 currentCharacter = source[currentPosition++];
1737 // if (((currentCharacter =
1738 // source[currentPosition++])
1740 // && (source[currentPosition] == 'u')) {
1741 // isUnicode = true;
1742 // //-------------unicode traitement
1744 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1745 // currentPosition++;
1746 // while (source[currentPosition] == 'u') {
1747 // currentPosition++;
1750 // Character.getNumericValue(source[currentPosition++]))
1754 // Character.getNumericValue(
1755 // source[currentPosition++]))
1759 // Character.getNumericValue(
1760 // source[currentPosition++]))
1764 // Character.getNumericValue(
1765 // source[currentPosition++]))
1769 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1771 // currentCharacter =
1772 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
1776 // handle the \\u case manually into comment
1777 // if (currentCharacter == '\\') {
1778 // if (source[currentPosition] == '\\')
1779 // currentPosition++;
1780 // } //jump over the \\
1783 endPositionForLineComment = currentPosition - 6;
1785 endPositionForLineComment = currentPosition - 1;
1787 // recordComment(false);
1788 recordComment(TokenNameCOMMENT_LINE);
1789 if (this.taskTags != null)
1790 checkTaskTag(this.startPosition,
1791 this.currentPosition);
1792 if ((currentCharacter == '\r')
1793 || (currentCharacter == '\n')) {
1794 checkNonExternalizeString();
1795 if (recordLineSeparator) {
1797 pushUnicodeLineSeparator();
1799 pushLineSeparator();
1805 if (tokenizeComments) {
1807 currentPosition = endPositionForLineComment;
1808 // reset one character behind
1810 return TokenNameCOMMENT_LINE;
1812 } catch (IndexOutOfBoundsException e) { // an eof
1815 if (tokenizeComments) {
1817 // reset one character behind
1818 return TokenNameCOMMENT_LINE;
1824 // traditional and annotation comment
1825 boolean isJavadoc = false, star = false;
1826 // consume next character
1827 unicodeAsBackSlash = false;
1828 currentCharacter = source[currentPosition++];
1829 // if (((currentCharacter =
1830 // source[currentPosition++]) ==
1832 // && (source[currentPosition] == 'u')) {
1833 // getNextUnicodeChar();
1835 // if (withoutUnicodePtr != 0) {
1836 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1837 // currentCharacter;
1840 if (currentCharacter == '*') {
1844 if ((currentCharacter == '\r')
1845 || (currentCharacter == '\n')) {
1846 checkNonExternalizeString();
1847 if (recordLineSeparator) {
1848 pushLineSeparator();
1853 try { // get the next char
1854 currentCharacter = source[currentPosition++];
1855 // if (((currentCharacter =
1856 // source[currentPosition++])
1858 // && (source[currentPosition] == 'u')) {
1859 // //-------------unicode traitement
1861 // getNextUnicodeChar();
1863 // handle the \\u case manually into comment
1864 // if (currentCharacter == '\\') {
1865 // if (source[currentPosition] == '\\')
1866 // currentPosition++;
1867 // //jump over the \\
1869 // empty comment is not a javadoc /**/
1870 if (currentCharacter == '/') {
1873 // loop until end of comment */
1874 while ((currentCharacter != '/') || (!star)) {
1875 if ((currentCharacter == '\r')
1876 || (currentCharacter == '\n')) {
1877 checkNonExternalizeString();
1878 if (recordLineSeparator) {
1879 pushLineSeparator();
1884 star = currentCharacter == '*';
1886 currentCharacter = source[currentPosition++];
1887 // if (((currentCharacter =
1888 // source[currentPosition++])
1890 // && (source[currentPosition] == 'u')) {
1891 // //-------------unicode traitement
1893 // getNextUnicodeChar();
1895 // handle the \\u case manually into comment
1896 // if (currentCharacter == '\\') {
1897 // if (source[currentPosition] == '\\')
1898 // currentPosition++;
1899 // } //jump over the \\
1901 // recordComment(isJavadoc);
1903 recordComment(TokenNameCOMMENT_PHPDOC);
1905 recordComment(TokenNameCOMMENT_BLOCK);
1908 if (tokenizeComments) {
1910 return TokenNameCOMMENT_PHPDOC;
1911 return TokenNameCOMMENT_BLOCK;
1914 if (this.taskTags != null) {
1915 checkTaskTag(this.startPosition,
1916 this.currentPosition);
1918 } catch (IndexOutOfBoundsException e) {
1919 // reset end position for error reporting
1920 currentPosition -= 2;
1921 throw new InvalidInputException(
1922 UNTERMINATED_COMMENT);
1926 return TokenNameDIVIDE;
1930 return TokenNameEOF;
1931 // the atEnd may not be <currentPosition ==
1932 // source.length> if
1933 // source is only some part of a real (external) stream
1934 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1936 if (currentCharacter == '$') {
1937 int oldPosition = currentPosition;
1939 currentCharacter = source[currentPosition++];
1940 if (isPHPIdentifierStart(currentCharacter)) {
1941 return scanIdentifierOrKeyword(true);
1943 currentPosition = oldPosition;
1944 return TokenNameDOLLAR;
1946 } catch (IndexOutOfBoundsException e) {
1947 currentPosition = oldPosition;
1948 return TokenNameDOLLAR;
1951 if (isPHPIdentifierStart(currentCharacter))
1952 return scanIdentifierOrKeyword(false);
1953 if (Character.isDigit(currentCharacter))
1954 return scanNumber(false);
1955 return TokenNameERROR;
1958 } // -----------------end switch while try--------------------
1959 catch (IndexOutOfBoundsException e) {
1962 return TokenNameEOF;
1967 * @throws InvalidInputException
1969 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1970 boolean phpShortTag = false; // true, if <?= detected
1971 if (currentPosition > source.length) {
1972 currentPosition = source.length;
1973 return TokenNameEOF;
1975 startPosition = start;
1978 currentCharacter = source[currentPosition++];
1979 if (currentCharacter == '<') {
1980 if (getNextChar('?')) {
1981 currentCharacter = source[currentPosition++];
1982 if ((currentCharacter != 'P')
1983 && (currentCharacter != 'p')) {
1984 if (currentCharacter != '=') { // <?=
1986 phpShortTag = false;
1991 if (ignorePHPOneLiner) { // for CodeFormatter
1992 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1995 fFillerToken = TokenNameECHO_INVISIBLE;
1997 return TokenNameINLINE_HTML;
2000 boolean foundXML = false;
2001 if (getNextChar('X', 'x') >= 0) {
2002 if (getNextChar('M', 'm') >= 0) {
2003 if (getNextChar('L', 'l') >= 0) {
2012 fFillerToken = TokenNameECHO_INVISIBLE;
2014 return TokenNameINLINE_HTML;
2017 if (getNextChar('H', 'h') >= 0) {
2018 if (getNextChar('P', 'p') >= 0) {
2020 if (ignorePHPOneLiner) {
2021 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
2023 return TokenNameINLINE_HTML;
2027 return TokenNameINLINE_HTML;
2035 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2036 if (recordLineSeparator) {
2037 pushLineSeparator();
2042 } // -----------------while--------------------
2044 return TokenNameINLINE_HTML;
2045 } // -----------------try--------------------
2046 catch (IndexOutOfBoundsException e) {
2047 startPosition = start;
2051 return TokenNameINLINE_HTML;
2055 * check if the PHP is only in this line (for CodeFormatter)
2059 private int lookAheadLinePHPTag() {
2060 int currentPositionInLine = currentPosition;
2061 char previousCharInLine = ' ';
2062 char currentCharInLine = ' ';
2063 boolean singleQuotedStringActive = false;
2064 boolean doubleQuotedStringActive = false;
2067 // look ahead in this line
2069 previousCharInLine = currentCharInLine;
2070 currentCharInLine = source[currentPositionInLine++];
2071 switch (currentCharInLine) {
2073 if (previousCharInLine == '?') {
2074 // update the scanner's current Position in the source
2075 currentPosition = currentPositionInLine;
2076 // use as "dummy" token
2077 return TokenNameEOF;
2081 if (doubleQuotedStringActive) {
2082 // ignore escaped characters in double quoted strings
2083 previousCharInLine = currentCharInLine;
2084 currentCharInLine = source[currentPositionInLine++];
2087 if (doubleQuotedStringActive) {
2088 doubleQuotedStringActive = false;
2090 if (!singleQuotedStringActive) {
2091 doubleQuotedStringActive = true;
2096 if (singleQuotedStringActive) {
2097 if (previousCharInLine != '\\') {
2098 singleQuotedStringActive = false;
2101 if (!doubleQuotedStringActive) {
2102 singleQuotedStringActive = true;
2108 return TokenNameINLINE_HTML;
2110 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2112 return TokenNameINLINE_HTML;
2116 if (previousCharInLine == '/' && !singleQuotedStringActive
2117 && !doubleQuotedStringActive) {
2119 return TokenNameINLINE_HTML;
2123 if (previousCharInLine == '/' && !singleQuotedStringActive
2124 && !doubleQuotedStringActive) {
2126 return TokenNameINLINE_HTML;
2131 } catch (IndexOutOfBoundsException e) {
2133 currentPosition = currentPositionInLine - 1;
2134 return TokenNameINLINE_HTML;
2138 // public final void getNextUnicodeChar()
2139 // throws IndexOutOfBoundsException, InvalidInputException {
2141 // //handle the case of unicode.
2142 // //when a unicode appears then we must use a buffer that holds char
2144 // //At the end of this method currentCharacter holds the new visited char
2145 // //and currentPosition points right next after it
2147 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2149 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2150 // currentPosition++;
2151 // while (source[currentPosition] == 'u') {
2152 // currentPosition++;
2156 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2158 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2160 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2162 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2164 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2166 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2167 // //need the unicode buffer
2168 // if (withoutUnicodePtr == 0) {
2169 // //buffer all the entries that have been left aside....
2170 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2171 // System.arraycopy(
2174 // withoutUnicodeBuffer,
2176 // withoutUnicodePtr);
2178 // //fill the buffer with the char
2179 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2181 // unicodeAsBackSlash = currentCharacter == '\\';
2184 * Tokenize a method body, assuming that curly brackets are properly
2187 public final void jumpOverMethodBody() {
2188 this.wasAcr = false;
2191 while (true) { // loop for jumping over comments
2192 // ---------Consume white space and handles
2193 // startPosition---------
2194 boolean isWhiteSpace;
2196 startPosition = currentPosition;
2197 currentCharacter = source[currentPosition++];
2198 // if (((currentCharacter = source[currentPosition++]) ==
2200 // && (source[currentPosition] == 'u')) {
2201 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2203 if (recordLineSeparator
2204 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2205 pushLineSeparator();
2206 isWhiteSpace = Character.isWhitespace(currentCharacter);
2208 } while (isWhiteSpace);
2209 // -------consume token until } is found---------
2210 switch (currentCharacter) {
2221 test = getNextChar('\\');
2224 scanDoubleQuotedEscapeCharacter();
2225 } catch (InvalidInputException ex) {
2229 // try { // consume next character
2230 unicodeAsBackSlash = false;
2231 currentCharacter = source[currentPosition++];
2232 // if (((currentCharacter = source[currentPosition++])
2234 // && (source[currentPosition] == 'u')) {
2235 // getNextUnicodeChar();
2237 if (withoutUnicodePtr != 0) {
2238 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2241 // } catch (InvalidInputException ex) {
2249 // try { // consume next character
2250 unicodeAsBackSlash = false;
2251 currentCharacter = source[currentPosition++];
2252 // if (((currentCharacter = source[currentPosition++])
2254 // && (source[currentPosition] == 'u')) {
2255 // getNextUnicodeChar();
2257 if (withoutUnicodePtr != 0) {
2258 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2261 // } catch (InvalidInputException ex) {
2263 while (currentCharacter != '"') {
2264 if (currentCharacter == '\r') {
2265 if (source[currentPosition] == '\n')
2268 // the string cannot go further that the line
2270 if (currentCharacter == '\n') {
2272 // the string cannot go further that the line
2274 if (currentCharacter == '\\') {
2276 scanDoubleQuotedEscapeCharacter();
2277 } catch (InvalidInputException ex) {
2281 // try { // consume next character
2282 unicodeAsBackSlash = false;
2283 currentCharacter = source[currentPosition++];
2284 // if (((currentCharacter =
2285 // source[currentPosition++]) == '\\')
2286 // && (source[currentPosition] == 'u')) {
2287 // getNextUnicodeChar();
2289 if (withoutUnicodePtr != 0) {
2290 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2293 // } catch (InvalidInputException ex) {
2296 } catch (IndexOutOfBoundsException e) {
2302 if ((test = getNextChar('/', '*')) == 0) {
2305 // get the next char
2306 currentCharacter = source[currentPosition++];
2307 // if (((currentCharacter =
2308 // source[currentPosition++]) ==
2310 // && (source[currentPosition] == 'u')) {
2311 // //-------------unicode traitement ------------
2312 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2313 // currentPosition++;
2314 // while (source[currentPosition] == 'u') {
2315 // currentPosition++;
2318 // Character.getNumericValue(source[currentPosition++]))
2322 // Character.getNumericValue(source[currentPosition++]))
2326 // Character.getNumericValue(source[currentPosition++]))
2330 // Character.getNumericValue(source[currentPosition++]))
2333 // //error don't care of the value
2334 // currentCharacter = 'A';
2335 // } //something different from \n and \r
2337 // currentCharacter =
2338 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2341 while (currentCharacter != '\r'
2342 && currentCharacter != '\n') {
2343 // get the next char
2344 currentCharacter = source[currentPosition++];
2345 // if (((currentCharacter =
2346 // source[currentPosition++])
2348 // && (source[currentPosition] == 'u')) {
2349 // //-------------unicode traitement
2351 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2352 // currentPosition++;
2353 // while (source[currentPosition] == 'u') {
2354 // currentPosition++;
2357 // Character.getNumericValue(source[currentPosition++]))
2361 // Character.getNumericValue(source[currentPosition++]))
2365 // Character.getNumericValue(source[currentPosition++]))
2369 // Character.getNumericValue(source[currentPosition++]))
2372 // //error don't care of the value
2373 // currentCharacter = 'A';
2374 // } //something different from \n and \r
2376 // currentCharacter =
2377 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
2382 if (recordLineSeparator
2383 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2384 pushLineSeparator();
2385 } catch (IndexOutOfBoundsException e) {
2386 } // an eof will them be generated
2390 // traditional and annotation comment
2391 boolean star = false;
2392 // try { // consume next character
2393 unicodeAsBackSlash = false;
2394 currentCharacter = source[currentPosition++];
2395 // if (((currentCharacter = source[currentPosition++])
2397 // && (source[currentPosition] == 'u')) {
2398 // getNextUnicodeChar();
2400 if (withoutUnicodePtr != 0) {
2401 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2404 // } catch (InvalidInputException ex) {
2406 if (currentCharacter == '*') {
2409 if (recordLineSeparator
2410 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2411 pushLineSeparator();
2412 try { // get the next char
2413 currentCharacter = source[currentPosition++];
2414 // if (((currentCharacter =
2415 // source[currentPosition++]) ==
2417 // && (source[currentPosition] == 'u')) {
2418 // //-------------unicode traitement ------------
2419 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2420 // currentPosition++;
2421 // while (source[currentPosition] == 'u') {
2422 // currentPosition++;
2425 // Character.getNumericValue(source[currentPosition++]))
2429 // Character.getNumericValue(source[currentPosition++]))
2433 // Character.getNumericValue(source[currentPosition++]))
2437 // Character.getNumericValue(source[currentPosition++]))
2440 // //error don't care of the value
2441 // currentCharacter = 'A';
2442 // } //something different from * and /
2444 // currentCharacter =
2445 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2448 // loop until end of comment */
2449 while ((currentCharacter != '/') || (!star)) {
2450 if (recordLineSeparator
2451 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2452 pushLineSeparator();
2453 star = currentCharacter == '*';
2455 currentCharacter = source[currentPosition++];
2456 // if (((currentCharacter =
2457 // source[currentPosition++])
2459 // && (source[currentPosition] == 'u')) {
2460 // //-------------unicode traitement
2462 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2463 // currentPosition++;
2464 // while (source[currentPosition] == 'u') {
2465 // currentPosition++;
2468 // Character.getNumericValue(source[currentPosition++]))
2472 // Character.getNumericValue(source[currentPosition++]))
2476 // Character.getNumericValue(source[currentPosition++]))
2480 // Character.getNumericValue(source[currentPosition++]))
2483 // //error don't care of the value
2484 // currentCharacter = 'A';
2485 // } //something different from * and /
2487 // currentCharacter =
2488 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
2493 } catch (IndexOutOfBoundsException e) {
2501 if (isPHPIdentOrVarStart(currentCharacter)) {
2503 scanIdentifierOrKeyword((currentCharacter == '$'));
2504 } catch (InvalidInputException ex) {
2509 if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2510 // if (Character.isDigit(currentCharacter)) {
2513 } catch (InvalidInputException ex) {
2520 // -----------------end switch while try--------------------
2521 } catch (IndexOutOfBoundsException e) {
2522 } catch (InvalidInputException e) {
2527 // public final boolean jumpOverUnicodeWhiteSpace()
2528 // throws InvalidInputException {
2530 // //handle the case of unicode. Jump over the next whiteSpace
2531 // //making startPosition pointing on the next available char
2532 // //On false, the currentCharacter is filled up with a potential
2536 // this.wasAcr = false;
2537 // int c1, c2, c3, c4;
2538 // int unicodeSize = 6;
2539 // currentPosition++;
2540 // while (source[currentPosition] == 'u') {
2541 // currentPosition++;
2545 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2547 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2549 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2551 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2553 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2556 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2557 // if (recordLineSeparator
2558 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2559 // pushLineSeparator();
2560 // if (Character.isWhitespace(currentCharacter))
2563 // //buffer the new char which is not a white space
2564 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2565 // //withoutUnicodePtr == 1 is true here
2567 // } catch (IndexOutOfBoundsException e) {
2568 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2571 public final int[] getLineEnds() {
2572 // return a bounded copy of this.lineEnds
2574 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0,
2579 public char[] getSource() {
2583 public static boolean isIdentifierOrKeyword(int token) {
2584 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2587 final char[] optimizedCurrentTokenSource1() {
2588 // return always the same char[] build only once
2589 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2590 char charOne = source[startPosition];
2645 return new char[] { charOne };
2649 final char[] optimizedCurrentTokenSource2() {
2651 c0 = source[startPosition];
2652 c1 = source[startPosition + 1];
2654 // return always the same char[] build only once
2655 // optimization at no speed cost of 99.5 % of the
2656 // singleCharIdentifier
2659 return charArray_va;
2661 return charArray_vb;
2663 return charArray_vc;
2665 return charArray_vd;
2667 return charArray_ve;
2669 return charArray_vf;
2671 return charArray_vg;
2673 return charArray_vh;
2675 return charArray_vi;
2677 return charArray_vj;
2679 return charArray_vk;
2681 return charArray_vl;
2683 return charArray_vm;
2685 return charArray_vn;
2687 return charArray_vo;
2689 return charArray_vp;
2691 return charArray_vq;
2693 return charArray_vr;
2695 return charArray_vs;
2697 return charArray_vt;
2699 return charArray_vu;
2701 return charArray_vv;
2703 return charArray_vw;
2705 return charArray_vx;
2707 return charArray_vy;
2709 return charArray_vz;
2712 // try to return the same char[] build only once
2713 int hash = ((c0 << 6) + c1) % TableSize;
2714 char[][] table = charArray_length[0][hash];
2716 while (++i < InternalTableSize) {
2717 char[] charArray = table[i];
2718 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2721 // ---------other side---------
2723 int max = newEntry2;
2724 while (++i <= max) {
2725 char[] charArray = table[i];
2726 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2729 // --------add the entry-------
2730 if (++max >= InternalTableSize)
2733 table[max] = (r = new char[] { c0, c1 });
2738 final char[] optimizedCurrentTokenSource3() {
2739 // try to return the same char[] build only once
2741 int hash = (((c0 = source[startPosition]) << 12)
2742 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2744 char[][] table = charArray_length[1][hash];
2746 while (++i < InternalTableSize) {
2747 char[] charArray = table[i];
2748 if ((c0 == charArray[0]) && (c1 == charArray[1])
2749 && (c2 == charArray[2]))
2752 // ---------other side---------
2754 int max = newEntry3;
2755 while (++i <= max) {
2756 char[] charArray = table[i];
2757 if ((c0 == charArray[0]) && (c1 == charArray[1])
2758 && (c2 == charArray[2]))
2761 // --------add the entry-------
2762 if (++max >= InternalTableSize)
2765 table[max] = (r = new char[] { c0, c1, c2 });
2770 final char[] optimizedCurrentTokenSource4() {
2771 // try to return the same char[] build only once
2772 char c0, c1, c2, c3;
2773 long hash = ((((long) (c0 = source[startPosition])) << 18)
2774 + ((c1 = source[startPosition + 1]) << 12)
2775 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2777 char[][] table = charArray_length[2][(int) hash];
2779 while (++i < InternalTableSize) {
2780 char[] charArray = table[i];
2781 if ((c0 == charArray[0]) && (c1 == charArray[1])
2782 && (c2 == charArray[2]) && (c3 == charArray[3]))
2785 // ---------other side---------
2787 int max = newEntry4;
2788 while (++i <= max) {
2789 char[] charArray = table[i];
2790 if ((c0 == charArray[0]) && (c1 == charArray[1])
2791 && (c2 == charArray[2]) && (c3 == charArray[3]))
2794 // --------add the entry-------
2795 if (++max >= InternalTableSize)
2798 table[max] = (r = new char[] { c0, c1, c2, c3 });
2803 final char[] optimizedCurrentTokenSource5() {
2804 // try to return the same char[] build only once
2805 char c0, c1, c2, c3, c4;
2806 long hash = ((((long) (c0 = source[startPosition])) << 24)
2807 + (((long) (c1 = source[startPosition + 1])) << 18)
2808 + ((c2 = source[startPosition + 2]) << 12)
2809 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2811 char[][] table = charArray_length[3][(int) hash];
2813 while (++i < InternalTableSize) {
2814 char[] charArray = table[i];
2815 if ((c0 == charArray[0]) && (c1 == charArray[1])
2816 && (c2 == charArray[2]) && (c3 == charArray[3])
2817 && (c4 == charArray[4]))
2820 // ---------other side---------
2822 int max = newEntry5;
2823 while (++i <= max) {
2824 char[] charArray = table[i];
2825 if ((c0 == charArray[0]) && (c1 == charArray[1])
2826 && (c2 == charArray[2]) && (c3 == charArray[3])
2827 && (c4 == charArray[4]))
2830 // --------add the entry-------
2831 if (++max >= InternalTableSize)
2834 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2839 final char[] optimizedCurrentTokenSource6() {
2840 // try to return the same char[] build only once
2841 char c0, c1, c2, c3, c4, c5;
2842 long hash = ((((long) (c0 = source[startPosition])) << 32)
2843 + (((long) (c1 = source[startPosition + 1])) << 24)
2844 + (((long) (c2 = source[startPosition + 2])) << 18)
2845 + ((c3 = source[startPosition + 3]) << 12)
2846 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2848 char[][] table = charArray_length[4][(int) hash];
2850 while (++i < InternalTableSize) {
2851 char[] charArray = table[i];
2852 if ((c0 == charArray[0]) && (c1 == charArray[1])
2853 && (c2 == charArray[2]) && (c3 == charArray[3])
2854 && (c4 == charArray[4]) && (c5 == charArray[5]))
2857 // ---------other side---------
2859 int max = newEntry6;
2860 while (++i <= max) {
2861 char[] charArray = table[i];
2862 if ((c0 == charArray[0]) && (c1 == charArray[1])
2863 && (c2 == charArray[2]) && (c3 == charArray[3])
2864 && (c4 == charArray[4]) && (c5 == charArray[5]))
2867 // --------add the entry-------
2868 if (++max >= InternalTableSize)
2871 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2876 public final void pushLineSeparator() throws InvalidInputException {
2877 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2878 final int INCREMENT = 250;
2879 if (this.checkNonExternalizedStringLiterals) {
2880 // reinitialize the current line for non externalize strings purpose
2883 // currentCharacter is at position currentPosition-1
2885 if (currentCharacter == '\r') {
2886 int separatorPos = currentPosition - 1;
2887 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2889 // System.out.println("CR-" + separatorPos);
2891 lineEnds[++linePtr] = separatorPos;
2892 } catch (IndexOutOfBoundsException e) {
2893 // linePtr value is correct
2894 int oldLength = lineEnds.length;
2895 int[] old = lineEnds;
2896 lineEnds = new int[oldLength + INCREMENT];
2897 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2898 lineEnds[linePtr] = separatorPos;
2900 // look-ahead for merged cr+lf
2902 if (source[currentPosition] == '\n') {
2903 // System.out.println("look-ahead LF-" + currentPosition);
2904 lineEnds[linePtr] = currentPosition;
2910 } catch (IndexOutOfBoundsException e) {
2915 if (currentCharacter == '\n') {
2916 // must merge eventual cr followed by lf
2917 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2918 // System.out.println("merge LF-" + (currentPosition - 1));
2919 lineEnds[linePtr] = currentPosition - 1;
2921 int separatorPos = currentPosition - 1;
2922 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2924 // System.out.println("LF-" + separatorPos);
2926 lineEnds[++linePtr] = separatorPos;
2927 } catch (IndexOutOfBoundsException e) {
2928 // linePtr value is correct
2929 int oldLength = lineEnds.length;
2930 int[] old = lineEnds;
2931 lineEnds = new int[oldLength + INCREMENT];
2932 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2933 lineEnds[linePtr] = separatorPos;
2941 public final void pushUnicodeLineSeparator() {
2942 // isUnicode means that the \r or \n has been read as a unicode
2944 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2945 final int INCREMENT = 250;
2946 // currentCharacter is at position currentPosition-1
2947 if (this.checkNonExternalizedStringLiterals) {
2948 // reinitialize the current line for non externalize strings purpose
2952 if (currentCharacter == '\r') {
2953 int separatorPos = currentPosition - 6;
2954 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2956 // System.out.println("CR-" + separatorPos);
2958 lineEnds[++linePtr] = separatorPos;
2959 } catch (IndexOutOfBoundsException e) {
2960 // linePtr value is correct
2961 int oldLength = lineEnds.length;
2962 int[] old = lineEnds;
2963 lineEnds = new int[oldLength + INCREMENT];
2964 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2965 lineEnds[linePtr] = separatorPos;
2967 // look-ahead for merged cr+lf
2968 if (source[currentPosition] == '\n') {
2969 // System.out.println("look-ahead LF-" + currentPosition);
2970 lineEnds[linePtr] = currentPosition;
2978 if (currentCharacter == '\n') {
2979 // must merge eventual cr followed by lf
2980 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2981 // System.out.println("merge LF-" + (currentPosition - 1));
2982 lineEnds[linePtr] = currentPosition - 6;
2984 int separatorPos = currentPosition - 6;
2985 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2987 // System.out.println("LF-" + separatorPos);
2989 lineEnds[++linePtr] = separatorPos;
2990 } catch (IndexOutOfBoundsException e) {
2991 // linePtr value is correct
2992 int oldLength = lineEnds.length;
2993 int[] old = lineEnds;
2994 lineEnds = new int[oldLength + INCREMENT];
2995 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2996 lineEnds[linePtr] = separatorPos;
3004 public void recordComment(int token) {
3006 int stopPosition = this.currentPosition;
3008 case TokenNameCOMMENT_LINE:
3009 stopPosition = -this.lastCommentLinePosition;
3011 case TokenNameCOMMENT_BLOCK:
3012 stopPosition = -this.currentPosition;
3016 // a new comment is recorded
3017 int length = this.commentStops.length;
3018 if (++this.commentPtr >= length) {
3019 System.arraycopy(this.commentStops, 0,
3020 this.commentStops = new int[length + 30], 0, length);
3021 // grows the positions buffers too
3022 System.arraycopy(this.commentStarts, 0,
3023 this.commentStarts = new int[length + 30], 0, length);
3025 this.commentStops[this.commentPtr] = stopPosition;
3026 this.commentStarts[this.commentPtr] = this.startPosition;
3029 // public final void recordComment(boolean isJavadoc) {
3030 // // a new annotation comment is recorded
3032 // commentStops[++commentPtr] = isJavadoc
3033 // ? currentPosition
3034 // : -currentPosition;
3035 // } catch (IndexOutOfBoundsException e) {
3036 // int oldStackLength = commentStops.length;
3037 // int[] oldStack = commentStops;
3038 // commentStops = new int[oldStackLength + 30];
3039 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
3040 // commentStops[commentPtr] = isJavadoc ? currentPosition :
3041 // -currentPosition;
3042 // //grows the positions buffers too
3043 // int[] old = commentStarts;
3044 // commentStarts = new int[oldStackLength + 30];
3045 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
3047 // //the buffer is of a correct size here
3048 // commentStarts[commentPtr] = startPosition;
3050 public void resetTo(int begin, int end) {
3051 // reset the scanner to a given position where it may rescan again
3053 initialPosition = startPosition = currentPosition = begin;
3054 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
3055 commentPtr = -1; // reset comment stack
3058 public final void scanSingleQuotedEscapeCharacter()
3059 throws InvalidInputException {
3060 // the string with "\\u" is a legal string of two chars \ and u
3061 // thus we use a direct access to the source (for regular cases).
3062 // if (unicodeAsBackSlash) {
3063 // // consume next character
3064 // unicodeAsBackSlash = false;
3065 // if (((currentCharacter = source[currentPosition++]) == '\\')
3066 // && (source[currentPosition] == 'u')) {
3067 // getNextUnicodeChar();
3069 // if (withoutUnicodePtr != 0) {
3070 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3074 currentCharacter = source[currentPosition++];
3075 switch (currentCharacter) {
3077 currentCharacter = '\'';
3080 currentCharacter = '\\';
3083 currentCharacter = '\\';
3088 public final void scanDoubleQuotedEscapeCharacter()
3089 throws InvalidInputException {
3090 currentCharacter = source[currentPosition++];
3091 switch (currentCharacter) {
3093 // currentCharacter = '\b';
3096 currentCharacter = '\t';
3099 currentCharacter = '\n';
3102 // currentCharacter = '\f';
3105 currentCharacter = '\r';
3108 currentCharacter = '\"';
3111 currentCharacter = '\'';
3114 currentCharacter = '\\';
3117 currentCharacter = '$';
3120 // -----------octal escape--------------
3122 // OctalDigit OctalDigit
3123 // ZeroToThree OctalDigit OctalDigit
3124 int number = Character.getNumericValue(currentCharacter);
3125 if (number >= 0 && number <= 7) {
3126 boolean zeroToThreeNot = number > 3;
3128 .isDigit(currentCharacter = source[currentPosition++])) {
3129 int digit = Character.getNumericValue(currentCharacter);
3130 if (digit >= 0 && digit <= 7) {
3131 number = (number * 8) + digit;
3133 .isDigit(currentCharacter = source[currentPosition++])) {
3134 if (zeroToThreeNot) { // has read \NotZeroToThree
3136 // Digit --> ignore last character
3140 .getNumericValue(currentCharacter);
3141 if (digit >= 0 && digit <= 7) {
3142 // has read \ZeroToThree OctalDigit
3144 number = (number * 8) + digit;
3145 } else { // has read \ZeroToThree OctalDigit
3147 // --> ignore last character
3151 } else { // has read \OctalDigit NonDigit--> ignore
3156 } else { // has read \OctalDigit NonOctalDigit--> ignore
3161 } else { // has read \OctalDigit --> ignore last character
3165 throw new InvalidInputException(INVALID_ESCAPE);
3166 currentCharacter = (char) number;
3169 // throw new InvalidInputException(INVALID_ESCAPE);
3173 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3174 // return scanIdentifierOrKeyword( false );
3176 public int scanIdentifierOrKeyword(boolean isVariable)
3177 throws InvalidInputException {
3179 // first dispatch on the first char.
3180 // then the length. If there are several
3181 // keywors with the same length AND the same first char, then do another
3182 // disptach on the second char :-)...cool....but fast !
3183 useAssertAsAnIndentifier = false;
3184 while (getNextCharAsJavaIdentifierPart()) {
3188 // if (new String(getCurrentTokenSource()).equals("$this")) {
3189 // return TokenNamethis;
3191 return TokenNameVariable;
3196 // if (withoutUnicodePtr == 0)
3197 // quick test on length == 1 but not on length > 12 while most
3199 // have a length which is <= 12...but there are lots of identifier with
3200 // only one char....
3202 if ((length = currentPosition - startPosition) == 1)
3203 return TokenNameIdentifier;
3205 data = new char[length];
3206 index = startPosition;
3207 for (int i = 0; i < length; i++) {
3208 data[i] = Character.toLowerCase(source[index + i]);
3212 // if ((length = withoutUnicodePtr) == 1)
3213 // return TokenNameIdentifier;
3214 // // data = withoutUnicodeBuffer;
3215 // data = new char[withoutUnicodeBuffer.length];
3216 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3217 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3221 firstLetter = data[index];
3222 switch (firstLetter) {
3227 if ((data[++index] == '_') && (data[++index] == 'f')
3228 && (data[++index] == 'i') && (data[++index] == 'l')
3229 && (data[++index] == 'e') && (data[++index] == '_')
3230 && (data[++index] == '_'))
3231 return TokenNameFILE;
3232 index = 0; // __LINE__
3233 if ((data[++index] == '_') && (data[++index] == 'l')
3234 && (data[++index] == 'i') && (data[++index] == 'n')
3235 && (data[++index] == 'e') && (data[++index] == '_')
3236 && (data[++index] == '_'))
3237 return TokenNameLINE;
3241 if ((data[++index] == '_') && (data[++index] == 'c')
3242 && (data[++index] == 'l') && (data[++index] == 'a')
3243 && (data[++index] == 's') && (data[++index] == 's')
3244 && (data[++index] == '_') && (data[++index] == '_'))
3245 return TokenNameCLASS_C;
3249 if ((data[++index] == '_') && (data[++index] == 'm')
3250 && (data[++index] == 'e') && (data[++index] == 't')
3251 && (data[++index] == 'h') && (data[++index] == 'o')
3252 && (data[++index] == 'd') && (data[++index] == '_')
3253 && (data[++index] == '_'))
3254 return TokenNameMETHOD_C;
3258 if ((data[++index] == '_') && (data[++index] == 'f')
3259 && (data[++index] == 'u') && (data[++index] == 'n')
3260 && (data[++index] == 'c') && (data[++index] == 't')
3261 && (data[++index] == 'i') && (data[++index] == 'o')
3262 && (data[++index] == 'n') && (data[++index] == '_')
3263 && (data[++index] == '_'))
3264 return TokenNameFUNC_C;
3267 return TokenNameIdentifier;
3269 // as and array abstract
3273 if ((data[++index] == 's')) {
3276 return TokenNameIdentifier;
3279 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3280 return TokenNameand;
3282 return TokenNameIdentifier;
3285 if ((data[++index] == 'r') && (data[++index] == 'r')
3286 && (data[++index] == 'a') && (data[++index] == 'y'))
3287 return TokenNamearray;
3288 return TokenNameIdentifier;
3290 if ((data[++index] == 'b') && (data[++index] == 's')
3291 && (data[++index] == 't') && (data[++index] == 'r')
3292 && (data[++index] == 'a') && (data[++index] == 'c')
3293 && (data[++index] == 't'))
3294 return TokenNameabstract;
3295 return TokenNameIdentifier;
3297 return TokenNameIdentifier;
3302 if ((data[++index] == 'r') && (data[++index] == 'e')
3303 && (data[++index] == 'a') && (data[++index] == 'k'))
3304 return TokenNamebreak;
3305 return TokenNameIdentifier;
3307 return TokenNameIdentifier;
3309 // case catch class clone const continue
3312 if ((data[++index] == 'a') && (data[++index] == 's')
3313 && (data[++index] == 'e'))
3314 return TokenNamecase;
3315 return TokenNameIdentifier;
3317 if ((data[++index] == 'a') && (data[++index] == 't')
3318 && (data[++index] == 'c') && (data[++index] == 'h'))
3319 return TokenNamecatch;
3321 if ((data[++index] == 'l') && (data[++index] == 'a')
3322 && (data[++index] == 's') && (data[++index] == 's'))
3323 return TokenNameclass;
3325 if ((data[++index] == 'l') && (data[++index] == 'o')
3326 && (data[++index] == 'n') && (data[++index] == 'e'))
3327 return TokenNameclone;
3329 if ((data[++index] == 'o') && (data[++index] == 'n')
3330 && (data[++index] == 's') && (data[++index] == 't'))
3331 return TokenNameconst;
3332 return TokenNameIdentifier;
3334 if ((data[++index] == 'o') && (data[++index] == 'n')
3335 && (data[++index] == 't') && (data[++index] == 'i')
3336 && (data[++index] == 'n') && (data[++index] == 'u')
3337 && (data[++index] == 'e'))
3338 return TokenNamecontinue;
3339 return TokenNameIdentifier;
3341 return TokenNameIdentifier;
3343 // declare default do die
3344 // TODO delete define ==> no keyword !
3347 if ((data[++index] == 'o'))
3349 return TokenNameIdentifier;
3351 // if ((data[++index] == 'e')
3352 // && (data[++index] == 'f')
3353 // && (data[++index] == 'i')
3354 // && (data[++index] == 'n')
3355 // && (data[++index] == 'e'))
3356 // return TokenNamedefine;
3358 // return TokenNameIdentifier;
3360 if ((data[++index] == 'e') && (data[++index] == 'c')
3361 && (data[++index] == 'l') && (data[++index] == 'a')
3362 && (data[++index] == 'r') && (data[++index] == 'e'))
3363 return TokenNamedeclare;
3365 if ((data[++index] == 'e') && (data[++index] == 'f')
3366 && (data[++index] == 'a') && (data[++index] == 'u')
3367 && (data[++index] == 'l') && (data[++index] == 't'))
3368 return TokenNamedefault;
3369 return TokenNameIdentifier;
3371 return TokenNameIdentifier;
3373 // echo else exit elseif extends eval
3376 if ((data[++index] == 'c') && (data[++index] == 'h')
3377 && (data[++index] == 'o'))
3378 return TokenNameecho;
3379 else if ((data[index] == 'l') && (data[++index] == 's')
3380 && (data[++index] == 'e'))
3381 return TokenNameelse;
3382 else if ((data[index] == 'x') && (data[++index] == 'i')
3383 && (data[++index] == 't'))
3384 return TokenNameexit;
3385 else if ((data[index] == 'v') && (data[++index] == 'a')
3386 && (data[++index] == 'l'))
3387 return TokenNameeval;
3388 return TokenNameIdentifier;
3391 if ((data[++index] == 'n') && (data[++index] == 'd')
3392 && (data[++index] == 'i') && (data[++index] == 'f'))
3393 return TokenNameendif;
3394 if ((data[index] == 'm') && (data[++index] == 'p')
3395 && (data[++index] == 't') && (data[++index] == 'y'))
3396 return TokenNameempty;
3397 return TokenNameIdentifier;
3400 if ((data[++index] == 'n') && (data[++index] == 'd')
3401 && (data[++index] == 'f') && (data[++index] == 'o')
3402 && (data[++index] == 'r'))
3403 return TokenNameendfor;
3404 else if ((data[index] == 'l') && (data[++index] == 's')
3405 && (data[++index] == 'e') && (data[++index] == 'i')
3406 && (data[++index] == 'f'))
3407 return TokenNameelseif;
3408 return TokenNameIdentifier;
3410 if ((data[++index] == 'x') && (data[++index] == 't')
3411 && (data[++index] == 'e') && (data[++index] == 'n')
3412 && (data[++index] == 'd') && (data[++index] == 's'))
3413 return TokenNameextends;
3414 return TokenNameIdentifier;
3417 if ((data[++index] == 'n') && (data[++index] == 'd')
3418 && (data[++index] == 'w') && (data[++index] == 'h')
3419 && (data[++index] == 'i') && (data[++index] == 'l')
3420 && (data[++index] == 'e'))
3421 return TokenNameendwhile;
3422 return TokenNameIdentifier;
3425 if ((data[++index] == 'n') && (data[++index] == 'd')
3426 && (data[++index] == 's') && (data[++index] == 'w')
3427 && (data[++index] == 'i') && (data[++index] == 't')
3428 && (data[++index] == 'c') && (data[++index] == 'h'))
3429 return TokenNameendswitch;
3430 return TokenNameIdentifier;
3433 if ((data[++index] == 'n') && (data[++index] == 'd')
3434 && (data[++index] == 'd') && (data[++index] == 'e')
3435 && (data[++index] == 'c') && (data[++index] == 'l')
3436 && (data[++index] == 'a') && (data[++index] == 'r')
3437 && (data[++index] == 'e'))
3438 return TokenNameenddeclare;
3440 if ((data[++index] == 'n') // endforeach
3441 && (data[++index] == 'd')
3442 && (data[++index] == 'f')
3443 && (data[++index] == 'o')
3444 && (data[++index] == 'r')
3445 && (data[++index] == 'e')
3446 && (data[++index] == 'a')
3447 && (data[++index] == 'c') && (data[++index] == 'h'))
3448 return TokenNameendforeach;
3449 return TokenNameIdentifier;
3451 return TokenNameIdentifier;
3453 // for false final function
3456 if ((data[++index] == 'o') && (data[++index] == 'r'))
3457 return TokenNamefor;
3458 return TokenNameIdentifier;
3460 // if ((data[++index] == 'a') && (data[++index] == 'l')
3461 // && (data[++index] == 's') && (data[++index] == 'e'))
3462 // return TokenNamefalse;
3463 if ((data[++index] == 'i') && (data[++index] == 'n')
3464 && (data[++index] == 'a') && (data[++index] == 'l'))
3465 return TokenNamefinal;
3466 return TokenNameIdentifier;
3469 if ((data[++index] == 'o') && (data[++index] == 'r')
3470 && (data[++index] == 'e') && (data[++index] == 'a')
3471 && (data[++index] == 'c') && (data[++index] == 'h'))
3472 return TokenNameforeach;
3473 return TokenNameIdentifier;
3476 if ((data[++index] == 'u') && (data[++index] == 'n')
3477 && (data[++index] == 'c') && (data[++index] == 't')
3478 && (data[++index] == 'i') && (data[++index] == 'o')
3479 && (data[++index] == 'n'))
3480 return TokenNamefunction;
3481 return TokenNameIdentifier;
3483 return TokenNameIdentifier;
3487 if ((data[++index] == 'l') && (data[++index] == 'o')
3488 && (data[++index] == 'b') && (data[++index] == 'a')
3489 && (data[++index] == 'l')) {
3490 return TokenNameglobal;
3493 return TokenNameIdentifier;
3495 // if int isset include include_once instanceof interface implements
3498 if (data[++index] == 'f')
3500 return TokenNameIdentifier;
3502 // if ((data[++index] == 'n') && (data[++index] == 't'))
3503 // return TokenNameint;
3505 // return TokenNameIdentifier;
3507 if ((data[++index] == 's') && (data[++index] == 's')
3508 && (data[++index] == 'e') && (data[++index] == 't'))
3509 return TokenNameisset;
3510 return TokenNameIdentifier;
3512 if ((data[++index] == 'n') && (data[++index] == 'c')
3513 && (data[++index] == 'l') && (data[++index] == 'u')
3514 && (data[++index] == 'd') && (data[++index] == 'e'))
3515 return TokenNameinclude;
3516 return TokenNameIdentifier;
3519 if ((data[++index] == 'n') && (data[++index] == 't')
3520 && (data[++index] == 'e') && (data[++index] == 'r')
3521 && (data[++index] == 'f') && (data[++index] == 'a')
3522 && (data[++index] == 'c') && (data[++index] == 'e'))
3523 return TokenNameinterface;
3524 return TokenNameIdentifier;
3527 if ((data[++index] == 'n') && (data[++index] == 's')
3528 && (data[++index] == 't') && (data[++index] == 'a')
3529 && (data[++index] == 'n') && (data[++index] == 'c')
3530 && (data[++index] == 'e') && (data[++index] == 'o')
3531 && (data[++index] == 'f'))
3532 return TokenNameinstanceof;
3533 if ((data[index] == 'm') && (data[++index] == 'p')
3534 && (data[++index] == 'l') && (data[++index] == 'e')
3535 && (data[++index] == 'm') && (data[++index] == 'e')
3536 && (data[++index] == 'n') && (data[++index] == 't')
3537 && (data[++index] == 's'))
3538 return TokenNameimplements;
3539 return TokenNameIdentifier;
3541 if ((data[++index] == 'n') && (data[++index] == 'c')
3542 && (data[++index] == 'l') && (data[++index] == 'u')
3543 && (data[++index] == 'd') && (data[++index] == 'e')
3544 && (data[++index] == '_') && (data[++index] == 'o')
3545 && (data[++index] == 'n') && (data[++index] == 'c')
3546 && (data[++index] == 'e'))
3547 return TokenNameinclude_once;
3548 return TokenNameIdentifier;
3550 return TokenNameIdentifier;
3554 if ((data[++index] == 'i') && (data[++index] == 's')
3555 && (data[++index] == 't')) {
3556 return TokenNamelist;
3559 return TokenNameIdentifier;
3564 if ((data[++index] == 'e') && (data[++index] == 'w'))
3565 return TokenNamenew;
3566 return TokenNameIdentifier;
3569 if ((data[++index] == 'a') && (data[++index] == 'm')
3570 && (data[++index] == 'e') && (data[++index] == 's')
3571 && (data[++index] == 'p') && (data[++index] == 'a')
3572 && (data[++index] == 'c') && (data[++index] == 'e'))
3573 return TokenNameNamespace;
3574 return TokenNameIdentifier;
3576 // if ((data[++index] == 'u') && (data[++index] == 'l')
3577 // && (data[++index] == 'l'))
3578 // return TokenNamenull;
3580 // return TokenNameIdentifier;
3582 return TokenNameIdentifier;
3586 if (data[++index] == 'r') {
3590 // if (length == 12) {
3591 // if ((data[++index] == 'l')
3592 // && (data[++index] == 'd')
3593 // && (data[++index] == '_')
3594 // && (data[++index] == 'f')
3595 // && (data[++index] == 'u')
3596 // && (data[++index] == 'n')
3597 // && (data[++index] == 'c')
3598 // && (data[++index] == 't')
3599 // && (data[++index] == 'i')
3600 // && (data[++index] == 'o')
3601 // && (data[++index] == 'n')) {
3602 // return TokenNameold_function;
3605 return TokenNameIdentifier;
3607 // print public private protected
3610 if ((data[++index] == 'r') && (data[++index] == 'i')
3611 && (data[++index] == 'n') && (data[++index] == 't')) {
3612 return TokenNameprint;
3614 return TokenNameIdentifier;
3616 if ((data[++index] == 'u') && (data[++index] == 'b')
3617 && (data[++index] == 'l') && (data[++index] == 'i')
3618 && (data[++index] == 'c')) {
3619 return TokenNamepublic;
3621 return TokenNameIdentifier;
3623 if ((data[++index] == 'r') && (data[++index] == 'i')
3624 && (data[++index] == 'v') && (data[++index] == 'a')
3625 && (data[++index] == 't') && (data[++index] == 'e')) {
3626 return TokenNameprivate;
3628 return TokenNameIdentifier;
3630 if ((data[++index] == 'r') && (data[++index] == 'o')
3631 && (data[++index] == 't') && (data[++index] == 'e')
3632 && (data[++index] == 'c') && (data[++index] == 't')
3633 && (data[++index] == 'e') && (data[++index] == 'd')) {
3634 return TokenNameprotected;
3636 return TokenNameIdentifier;
3638 return TokenNameIdentifier;
3640 // return require require_once
3642 if ((data[++index] == 'e') && (data[++index] == 't')
3643 && (data[++index] == 'u') && (data[++index] == 'r')
3644 && (data[++index] == 'n')) {
3645 return TokenNamereturn;
3647 } else if (length == 7) {
3648 if ((data[++index] == 'e') && (data[++index] == 'q')
3649 && (data[++index] == 'u') && (data[++index] == 'i')
3650 && (data[++index] == 'r') && (data[++index] == 'e')) {
3651 return TokenNamerequire;
3653 } else if (length == 12) {
3654 if ((data[++index] == 'e') && (data[++index] == 'q')
3655 && (data[++index] == 'u') && (data[++index] == 'i')
3656 && (data[++index] == 'r') && (data[++index] == 'e')
3657 && (data[++index] == '_') && (data[++index] == 'o')
3658 && (data[++index] == 'n') && (data[++index] == 'c')
3659 && (data[++index] == 'e')) {
3660 return TokenNamerequire_once;
3663 return TokenNameIdentifier;
3665 // self static switch
3668 // if ((data[++index] == 'e') && (data[++index] == 'l') &&
3671 // return TokenNameself;
3673 // return TokenNameIdentifier;
3675 if (data[++index] == 't')
3676 if ((data[++index] == 'a') && (data[++index] == 't')
3677 && (data[++index] == 'i') && (data[++index] == 'c')) {
3678 return TokenNamestatic;
3680 return TokenNameIdentifier;
3681 else if ((data[index] == 'w') && (data[++index] == 'i')
3682 && (data[++index] == 't') && (data[++index] == 'c')
3683 && (data[++index] == 'h'))
3684 return TokenNameswitch;
3686 return TokenNameIdentifier;
3691 if ((data[++index] == 'r') && (data[++index] == 'y'))
3692 return TokenNametry;
3694 // if ((data[++index] == 'r') && (data[++index] == 'u')
3695 // && (data[++index] == 'e'))
3696 // return TokenNametrue;
3698 return TokenNameIdentifier;
3700 if ((data[++index] == 'h') && (data[++index] == 'r')
3701 && (data[++index] == 'o') && (data[++index] == 'w'))
3702 return TokenNamethrow;
3704 return TokenNameIdentifier;
3709 if ((data[++index] == 's') && (data[++index] == 'e'))
3710 return TokenNameuse;
3712 return TokenNameIdentifier;
3714 if ((data[++index] == 'n') && (data[++index] == 's')
3715 && (data[++index] == 'e') && (data[++index] == 't'))
3716 return TokenNameunset;
3718 return TokenNameIdentifier;
3723 if ((data[++index] == 'a') && (data[++index] == 'r'))
3724 return TokenNamevar;
3726 return TokenNameIdentifier;
3731 if ((data[++index] == 'h') && (data[++index] == 'i')
3732 && (data[++index] == 'l') && (data[++index] == 'e'))
3733 return TokenNamewhile;
3734 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3735 // (data[++index]=='e') && (data[++index]=='f')&&
3736 // (data[++index]=='p'))
3737 // return TokenNamewidefp ;
3739 // return TokenNameIdentifier;
3741 return TokenNameIdentifier;
3746 if ((data[++index] == 'o') && (data[++index] == 'r'))
3747 return TokenNamexor;
3749 return TokenNameIdentifier;
3751 return TokenNameIdentifier;
3753 return TokenNameIdentifier;
3756 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3757 // when entering this method the currentCharacter is the firt
3758 // digit of the number , i.e. it may be preceeded by a . when
3759 // dotPrefix is true
3760 boolean floating = dotPrefix;
3761 if ((!dotPrefix) && (currentCharacter == '0')) {
3762 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3763 // force the first char of the hexa number do exist...
3764 // consume next character
3765 unicodeAsBackSlash = false;
3766 currentCharacter = source[currentPosition++];
3767 // if (((currentCharacter = source[currentPosition++]) == '\\')
3768 // && (source[currentPosition] == 'u')) {
3769 // getNextUnicodeChar();
3771 // if (withoutUnicodePtr != 0) {
3772 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3775 if (Character.digit(currentCharacter, 16) == -1)
3776 throw new InvalidInputException(INVALID_HEXA);
3778 while (getNextCharAsDigit(16)) {
3781 // if (getNextChar('l', 'L') >= 0)
3782 // return TokenNameLongLiteral;
3784 return TokenNameIntegerLiteral;
3786 // there is x or X in the number
3787 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3788 // 00078.0 is true !!!!! crazy language
3789 if (getNextCharAsDigit()) {
3790 // -------------potential octal-----------------
3791 while (getNextCharAsDigit()) {
3794 // if (getNextChar('l', 'L') >= 0) {
3795 // return TokenNameLongLiteral;
3798 // if (getNextChar('f', 'F') >= 0) {
3799 // return TokenNameFloatingPointLiteral;
3801 if (getNextChar('d', 'D') >= 0) {
3802 return TokenNameDoubleLiteral;
3803 } else { // make the distinction between octal and float ....
3804 if (getNextChar('.')) { // bingo ! ....
3805 while (getNextCharAsDigit()) {
3808 if (getNextChar('e', 'E') >= 0) {
3809 // consume next character
3810 unicodeAsBackSlash = false;
3811 currentCharacter = source[currentPosition++];
3812 // if (((currentCharacter =
3813 // source[currentPosition++]) == '\\')
3814 // && (source[currentPosition] == 'u')) {
3815 // getNextUnicodeChar();
3817 // if (withoutUnicodePtr != 0) {
3818 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3819 // currentCharacter;
3822 if ((currentCharacter == '-')
3823 || (currentCharacter == '+')) {
3824 // consume next character
3825 unicodeAsBackSlash = false;
3826 currentCharacter = source[currentPosition++];
3827 // if (((currentCharacter =
3828 // source[currentPosition++]) == '\\')
3829 // && (source[currentPosition] == 'u')) {
3830 // getNextUnicodeChar();
3832 // if (withoutUnicodePtr != 0) {
3833 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3834 // currentCharacter;
3838 if (!Character.isDigit(currentCharacter))
3839 throw new InvalidInputException(INVALID_FLOAT);
3840 while (getNextCharAsDigit()) {
3844 // if (getNextChar('f', 'F') >= 0)
3845 // return TokenNameFloatingPointLiteral;
3846 getNextChar('d', 'D'); // jump over potential d or D
3847 return TokenNameDoubleLiteral;
3849 return TokenNameIntegerLiteral;
3856 while (getNextCharAsDigit()) {
3859 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3860 // return TokenNameLongLiteral;
3861 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be
3863 while (getNextCharAsDigit()) {
3868 // if floating is true both exponant and suffix may be optional
3869 if (getNextChar('e', 'E') >= 0) {
3871 // consume next character
3872 unicodeAsBackSlash = false;
3873 currentCharacter = source[currentPosition++];
3874 // if (((currentCharacter = source[currentPosition++]) == '\\')
3875 // && (source[currentPosition] == 'u')) {
3876 // getNextUnicodeChar();
3878 // if (withoutUnicodePtr != 0) {
3879 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3882 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3885 unicodeAsBackSlash = false;
3886 currentCharacter = source[currentPosition++];
3887 // if (((currentCharacter = source[currentPosition++]) == '\\')
3888 // && (source[currentPosition] == 'u')) {
3889 // getNextUnicodeChar();
3891 // if (withoutUnicodePtr != 0) {
3892 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3896 if (!Character.isDigit(currentCharacter))
3897 throw new InvalidInputException(INVALID_FLOAT);
3898 while (getNextCharAsDigit()) {
3902 if (getNextChar('d', 'D') >= 0)
3903 return TokenNameDoubleLiteral;
3904 // if (getNextChar('f', 'F') >= 0)
3905 // return TokenNameFloatingPointLiteral;
3906 // the long flag has been tested before
3907 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3911 * Search the line number corresponding to a specific position
3914 public final int getLineNumber(int position) {
3915 if (lineEnds == null)
3917 int length = linePtr + 1;
3920 int g = 0, d = length - 1;
3924 if (position < lineEnds[m]) {
3926 } else if (position > lineEnds[m]) {
3932 if (position < lineEnds[m]) {
3938 public void setPHPMode(boolean mode) {
3942 public final void setSource(char[] source) {
3943 setSource(null, source);
3946 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3947 // the source-buffer is set to sourceString
3948 this.compilationUnit = compilationUnit;
3949 if (source == null) {
3950 this.source = new char[0];
3952 this.source = source;
3955 initialPosition = currentPosition = 0;
3956 containsAssertKeyword = false;
3957 withoutUnicodeBuffer = new char[this.source.length];
3958 fFillerToken = TokenNameEOF;
3959 // encapsedStringStack = new Stack();
3962 public String toString() {
3963 if (startPosition == source.length)
3964 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3965 if (currentPosition > source.length)
3966 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3967 char front[] = new char[startPosition];
3968 System.arraycopy(source, 0, front, 0, startPosition);
3969 int middleLength = (currentPosition - 1) - startPosition + 1;
3971 if (middleLength > -1) {
3972 middle = new char[middleLength];
3973 System.arraycopy(source, startPosition, middle, 0, middleLength);
3975 middle = new char[0];
3977 char end[] = new char[source.length - (currentPosition - 1)];
3978 System.arraycopy(source, (currentPosition - 1) + 1, end, 0,
3979 source.length - (currentPosition - 1) - 1);
3980 return new String(front)
3981 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3982 + new String(middle)
3983 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3987 public final String toStringAction(int act) {
3989 case TokenNameERROR:
3990 return "ScannerError"; // + new String(getCurrentTokenSource()) +
3993 case TokenNameINLINE_HTML:
3994 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3995 case TokenNameECHO_INVISIBLE:
3998 case TokenNameIdentifier:
3999 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4000 case TokenNameVariable:
4001 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4002 case TokenNameabstract:
4003 return "abstract"; //$NON-NLS-1$
4005 return "AND"; //$NON-NLS-1$
4006 case TokenNamearray:
4007 return "array"; //$NON-NLS-1$
4009 return "as"; //$NON-NLS-1$
4010 case TokenNamebreak:
4011 return "break"; //$NON-NLS-1$
4013 return "case"; //$NON-NLS-1$
4014 case TokenNameclass:
4015 return "class"; //$NON-NLS-1$
4016 case TokenNamecatch:
4017 return "catch"; //$NON-NLS-1$
4018 case TokenNameclone:
4021 case TokenNameconst:
4024 case TokenNamecontinue:
4025 return "continue"; //$NON-NLS-1$
4026 case TokenNamedefault:
4027 return "default"; //$NON-NLS-1$
4028 // case TokenNamedefine :
4029 // return "define"; //$NON-NLS-1$
4031 return "do"; //$NON-NLS-1$
4033 return "echo"; //$NON-NLS-1$
4035 return "else"; //$NON-NLS-1$
4036 case TokenNameelseif:
4037 return "elseif"; //$NON-NLS-1$
4038 case TokenNameendfor:
4039 return "endfor"; //$NON-NLS-1$
4040 case TokenNameendforeach:
4041 return "endforeach"; //$NON-NLS-1$
4042 case TokenNameendif:
4043 return "endif"; //$NON-NLS-1$
4044 case TokenNameendswitch:
4045 return "endswitch"; //$NON-NLS-1$
4046 case TokenNameendwhile:
4047 return "endwhile"; //$NON-NLS-1$
4050 case TokenNameextends:
4051 return "extends"; //$NON-NLS-1$
4052 // case TokenNamefalse :
4053 // return "false"; //$NON-NLS-1$
4054 case TokenNamefinal:
4055 return "final"; //$NON-NLS-1$
4057 return "for"; //$NON-NLS-1$
4058 case TokenNameforeach:
4059 return "foreach"; //$NON-NLS-1$
4060 case TokenNamefunction:
4061 return "function"; //$NON-NLS-1$
4062 case TokenNameglobal:
4063 return "global"; //$NON-NLS-1$
4065 return "if"; //$NON-NLS-1$
4066 case TokenNameimplements:
4067 return "implements"; //$NON-NLS-1$
4068 case TokenNameinclude:
4069 return "include"; //$NON-NLS-1$
4070 case TokenNameinclude_once:
4071 return "include_once"; //$NON-NLS-1$
4072 case TokenNameinstanceof:
4073 return "instanceof"; //$NON-NLS-1$
4074 case TokenNameinterface:
4075 return "interface"; //$NON-NLS-1$
4076 case TokenNameisset:
4077 return "isset"; //$NON-NLS-1$
4079 return "list"; //$NON-NLS-1$
4081 return "new"; //$NON-NLS-1$
4082 // case TokenNamenull :
4083 // return "null"; //$NON-NLS-1$
4085 return "OR"; //$NON-NLS-1$
4086 case TokenNameprint:
4087 return "print"; //$NON-NLS-1$
4088 case TokenNameprivate:
4089 return "private"; //$NON-NLS-1$
4090 case TokenNameprotected:
4091 return "protected"; //$NON-NLS-1$
4092 case TokenNamepublic:
4093 return "public"; //$NON-NLS-1$
4094 case TokenNamerequire:
4095 return "require"; //$NON-NLS-1$
4096 case TokenNamerequire_once:
4097 return "require_once"; //$NON-NLS-1$
4098 case TokenNameNamespace:
4099 return "namespace"; //$NON-NLS-1$
4100 case TokenNamereturn:
4101 return "return"; //$NON-NLS-1$
4102 // case TokenNameself:
4103 // return "self"; //$NON-NLS-1$
4104 case TokenNamestatic:
4105 return "static"; //$NON-NLS-1$
4106 case TokenNameswitch:
4107 return "switch"; //$NON-NLS-1$
4108 // case TokenNametrue :
4109 // return "true"; //$NON-NLS-1$
4110 case TokenNameunset:
4111 return "unset"; //$NON-NLS-1$
4113 return "var"; //$NON-NLS-1$
4114 case TokenNamewhile:
4115 return "while"; //$NON-NLS-1$
4117 return "XOR"; //$NON-NLS-1$
4118 // case TokenNamethis :
4119 // return "$this"; //$NON-NLS-1$
4120 case TokenNameIntegerLiteral:
4121 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4122 case TokenNameDoubleLiteral:
4123 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4124 case TokenNameStringDoubleQuote:
4125 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4126 case TokenNameStringSingleQuote:
4127 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4128 case TokenNameStringInterpolated:
4129 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4130 case TokenNameEncapsedString0:
4131 return "`"; //$NON-NLS-1$
4132 // case TokenNameEncapsedString1:
4133 // return "\'"; //$NON-NLS-1$
4134 // case TokenNameEncapsedString2:
4135 // return "\""; //$NON-NLS-1$
4136 case TokenNameSTRING:
4137 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4138 case TokenNameHEREDOC:
4139 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4140 case TokenNamePLUS_PLUS:
4141 return "++"; //$NON-NLS-1$
4142 case TokenNameMINUS_MINUS:
4143 return "--"; //$NON-NLS-1$
4144 case TokenNameEQUAL_EQUAL:
4145 return "=="; //$NON-NLS-1$
4146 case TokenNameEQUAL_EQUAL_EQUAL:
4147 return "==="; //$NON-NLS-1$
4148 case TokenNameEQUAL_GREATER:
4149 return "=>"; //$NON-NLS-1$
4150 case TokenNameLESS_EQUAL:
4151 return "<="; //$NON-NLS-1$
4152 case TokenNameGREATER_EQUAL:
4153 return ">="; //$NON-NLS-1$
4154 case TokenNameNOT_EQUAL:
4155 return "!="; //$NON-NLS-1$
4156 case TokenNameNOT_EQUAL_EQUAL:
4157 return "!=="; //$NON-NLS-1$
4158 case TokenNameLEFT_SHIFT:
4159 return "<<"; //$NON-NLS-1$
4160 case TokenNameRIGHT_SHIFT:
4161 return ">>"; //$NON-NLS-1$
4162 case TokenNamePLUS_EQUAL:
4163 return "+="; //$NON-NLS-1$
4164 case TokenNameMINUS_EQUAL:
4165 return "-="; //$NON-NLS-1$
4166 case TokenNameMULTIPLY_EQUAL:
4167 return "*="; //$NON-NLS-1$
4168 case TokenNameDIVIDE_EQUAL:
4169 return "/="; //$NON-NLS-1$
4170 case TokenNameAND_EQUAL:
4171 return "&="; //$NON-NLS-1$
4172 case TokenNameOR_EQUAL:
4173 return "|="; //$NON-NLS-1$
4174 case TokenNameXOR_EQUAL:
4175 return "^="; //$NON-NLS-1$
4176 case TokenNameREMAINDER_EQUAL:
4177 return "%="; //$NON-NLS-1$
4178 case TokenNameDOT_EQUAL:
4179 return ".="; //$NON-NLS-1$
4180 case TokenNameLEFT_SHIFT_EQUAL:
4181 return "<<="; //$NON-NLS-1$
4182 case TokenNameRIGHT_SHIFT_EQUAL:
4183 return ">>="; //$NON-NLS-1$
4184 case TokenNameOR_OR:
4185 return "||"; //$NON-NLS-1$
4186 case TokenNameAND_AND:
4187 return "&&"; //$NON-NLS-1$
4189 return "+"; //$NON-NLS-1$
4190 case TokenNameMINUS:
4191 return "-"; //$NON-NLS-1$
4192 case TokenNameMINUS_GREATER:
4195 return "!"; //$NON-NLS-1$
4196 case TokenNameREMAINDER:
4197 return "%"; //$NON-NLS-1$
4199 return "^"; //$NON-NLS-1$
4201 return "&"; //$NON-NLS-1$
4202 case TokenNameMULTIPLY:
4203 return "*"; //$NON-NLS-1$
4205 return "|"; //$NON-NLS-1$
4206 case TokenNameTWIDDLE:
4207 return "~"; //$NON-NLS-1$
4208 case TokenNameTWIDDLE_EQUAL:
4209 return "~="; //$NON-NLS-1$
4210 case TokenNameDIVIDE:
4211 return "/"; //$NON-NLS-1$
4212 case TokenNameGREATER:
4213 return ">"; //$NON-NLS-1$
4215 return "<"; //$NON-NLS-1$
4216 case TokenNameLPAREN:
4217 return "("; //$NON-NLS-1$
4218 case TokenNameRPAREN:
4219 return ")"; //$NON-NLS-1$
4220 case TokenNameLBRACE:
4221 return "{"; //$NON-NLS-1$
4222 case TokenNameRBRACE:
4223 return "}"; //$NON-NLS-1$
4224 case TokenNameLBRACKET:
4225 return "["; //$NON-NLS-1$
4226 case TokenNameRBRACKET:
4227 return "]"; //$NON-NLS-1$
4228 case TokenNameSEMICOLON:
4229 return ";"; //$NON-NLS-1$
4230 case TokenNameQUESTION:
4231 return "?"; //$NON-NLS-1$
4232 case TokenNameCOLON:
4233 return ":"; //$NON-NLS-1$
4234 case TokenNameCOMMA:
4235 return ","; //$NON-NLS-1$
4237 return "."; //$NON-NLS-1$
4238 case TokenNameEQUAL:
4239 return "="; //$NON-NLS-1$
4242 case TokenNameDOLLAR:
4244 case TokenNameDOLLAR_LBRACE:
4246 case TokenNameLBRACE_DOLLAR:
4249 return "EOF"; //$NON-NLS-1$
4250 case TokenNameWHITESPACE:
4251 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4252 case TokenNameCOMMENT_LINE:
4253 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4254 case TokenNameCOMMENT_BLOCK:
4255 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4256 case TokenNameCOMMENT_PHPDOC:
4257 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4258 // case TokenNameHTML :
4259 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4262 return "__FILE__"; //$NON-NLS-1$
4264 return "__LINE__"; //$NON-NLS-1$
4265 case TokenNameCLASS_C:
4266 return "__CLASS__"; //$NON-NLS-1$
4267 case TokenNameMETHOD_C:
4268 return "__METHOD__"; //$NON-NLS-1$
4269 case TokenNameFUNC_C:
4270 return "__FUNCTION__"; //$NON-NLS-1
4271 case TokenNameboolCAST:
4272 return "( bool )"; //$NON-NLS-1$
4273 case TokenNameintCAST:
4274 return "( int )"; //$NON-NLS-1$
4275 case TokenNamedoubleCAST:
4276 return "( double )"; //$NON-NLS-1$
4277 case TokenNameobjectCAST:
4278 return "( object )"; //$NON-NLS-1$
4279 case TokenNamestringCAST:
4280 return "( string )"; //$NON-NLS-1$
4282 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4290 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4291 this(tokenizeComments, tokenizeWhiteSpace, false);
4294 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4295 boolean checkNonExternalizedStringLiterals) {
4296 this(tokenizeComments, tokenizeWhiteSpace,
4297 checkNonExternalizedStringLiterals, false);
4300 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4301 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
4302 this(tokenizeComments, tokenizeWhiteSpace,
4303 checkNonExternalizedStringLiterals, assertMode, false, null,
4307 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4308 boolean checkNonExternalizedStringLiterals, boolean assertMode,
4309 boolean tokenizeStrings, char[][] taskTags,
4310 char[][] taskPriorities, boolean isTaskCaseSensitive) {
4311 this.eofPosition = Integer.MAX_VALUE;
4312 this.tokenizeComments = tokenizeComments;
4313 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4314 this.tokenizeStrings = tokenizeStrings;
4315 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4316 // this.assertMode = assertMode;
4317 // this.encapsedStringStack = null;
4318 this.taskTags = taskTags;
4319 this.taskPriorities = taskPriorities;
4322 private void checkNonExternalizeString() throws InvalidInputException {
4323 if (currentLine == null)
4325 parseTags(currentLine);
4328 private void parseTags(NLSLine line) throws InvalidInputException {
4329 String s = new String(getCurrentTokenSource());
4330 int pos = s.indexOf(TAG_PREFIX);
4331 int lineLength = line.size();
4333 int start = pos + TAG_PREFIX_LENGTH;
4334 int end = s.indexOf(TAG_POSTFIX, start);
4335 String index = s.substring(start, end);
4338 i = Integer.parseInt(index) - 1;
4339 // Tags are one based not zero based.
4340 } catch (NumberFormatException e) {
4341 i = -1; // we don't want to consider this as a valid NLS tag
4343 if (line.exists(i)) {
4346 pos = s.indexOf(TAG_PREFIX, start);
4348 this.nonNLSStrings = new StringLiteral[lineLength];
4349 int nonNLSCounter = 0;
4350 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4351 StringLiteral literal = (StringLiteral) iterator.next();
4352 if (literal != null) {
4353 this.nonNLSStrings[nonNLSCounter++] = literal;
4356 if (nonNLSCounter == 0) {
4357 this.nonNLSStrings = null;
4361 this.wasNonExternalizedStringLiteral = true;
4362 if (nonNLSCounter != lineLength) {
4363 System.arraycopy(this.nonNLSStrings, 0,
4364 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
4370 public final void scanEscapeCharacter() throws InvalidInputException {
4371 // the string with "\\u" is a legal string of two chars \ and u
4372 // thus we use a direct access to the source (for regular cases).
4373 if (unicodeAsBackSlash) {
4374 // consume next character
4375 unicodeAsBackSlash = false;
4376 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4377 // (source[currentPosition] == 'u')) {
4378 // getNextUnicodeChar();
4380 if (withoutUnicodePtr != 0) {
4381 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4385 currentCharacter = source[currentPosition++];
4386 switch (currentCharacter) {
4388 currentCharacter = '\b';
4391 currentCharacter = '\t';
4394 currentCharacter = '\n';
4397 currentCharacter = '\f';
4400 currentCharacter = '\r';
4403 currentCharacter = '\"';
4406 currentCharacter = '\'';
4409 currentCharacter = '\\';
4412 // -----------octal escape--------------
4414 // OctalDigit OctalDigit
4415 // ZeroToThree OctalDigit OctalDigit
4416 int number = Character.getNumericValue(currentCharacter);
4417 if (number >= 0 && number <= 7) {
4418 boolean zeroToThreeNot = number > 3;
4420 .isDigit(currentCharacter = source[currentPosition++])) {
4421 int digit = Character.getNumericValue(currentCharacter);
4422 if (digit >= 0 && digit <= 7) {
4423 number = (number * 8) + digit;
4425 .isDigit(currentCharacter = source[currentPosition++])) {
4426 if (zeroToThreeNot) { // has read \NotZeroToThree
4428 // Digit --> ignore last character
4432 .getNumericValue(currentCharacter);
4433 if (digit >= 0 && digit <= 7) { // has read
4435 // OctalDigit OctalDigit
4436 number = (number * 8) + digit;
4437 } else { // has read \ZeroToThree OctalDigit
4439 // --> ignore last character
4443 } else { // has read \OctalDigit NonDigit--> ignore
4448 } else { // has read \OctalDigit NonOctalDigit--> ignore
4453 } else { // has read \OctalDigit --> ignore last character
4457 throw new InvalidInputException(INVALID_ESCAPE);
4458 currentCharacter = (char) number;
4460 throw new InvalidInputException(INVALID_ESCAPE);
4464 // chech presence of task: tags
4465 // TODO (frederic) see if we need to take unicode characters into account...
4466 public void checkTaskTag(int commentStart, int commentEnd) {
4467 char[] src = this.source;
4469 // only look for newer task: tags
4470 if (this.foundTaskCount > 0
4471 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4474 int foundTaskIndex = this.foundTaskCount;
4475 char previous = src[commentStart + 1]; // should be '*' or '/'
4476 nextChar: for (int i = commentStart + 2; i < commentEnd
4477 && i < this.eofPosition; i++) {
4479 char[] priority = null;
4480 // check for tag occurrence only if not ambiguous with javadoc tag
4481 if (previous != '@') {
4482 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4483 tag = this.taskTags[itag];
4484 int tagLength = tag.length;
4488 // ensure tag is not leaded with letter if tag starts with a
4490 if (Scanner.isPHPIdentifierStart(tag[0])) {
4491 if (Scanner.isPHPIdentifierPart(previous)) {
4496 for (int t = 0; t < tagLength; t++) {
4499 if (x >= this.eofPosition || x >= commentEnd)
4501 if ((sc = src[i + t]) != (tc = tag[t])) { // case
4504 if (this.isTaskCaseSensitive
4505 || (Character.toLowerCase(sc) != Character
4506 .toLowerCase(tc))) { // case
4513 // ensure tag is not followed with letter if tag finishes
4516 if (i + tagLength < commentEnd
4517 && Scanner.isPHPIdentifierPart(src[i + tagLength
4519 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4522 if (this.foundTaskTags == null) {
4523 this.foundTaskTags = new char[5][];
4524 this.foundTaskMessages = new char[5][];
4525 this.foundTaskPriorities = new char[5][];
4526 this.foundTaskPositions = new int[5][];
4527 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4532 this.foundTaskTags = new char[this.foundTaskCount * 2][],
4533 0, this.foundTaskCount);
4536 this.foundTaskMessages,
4538 this.foundTaskMessages = new char[this.foundTaskCount * 2][],
4539 0, this.foundTaskCount);
4542 this.foundTaskPriorities,
4544 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4545 0, this.foundTaskCount);
4548 this.foundTaskPositions,
4550 this.foundTaskPositions = new int[this.foundTaskCount * 2][],
4551 0, this.foundTaskCount);
4554 priority = this.taskPriorities != null
4555 && itag < this.taskPriorities.length ? this.taskPriorities[itag]
4558 this.foundTaskTags[this.foundTaskCount] = tag;
4559 this.foundTaskPriorities[this.foundTaskCount] = priority;
4560 this.foundTaskPositions[this.foundTaskCount] = new int[] {
4561 i, i + tagLength - 1 };
4562 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4563 this.foundTaskCount++;
4564 i += tagLength - 1; // will be incremented when looping
4570 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4571 // retrieve message start and end positions
4572 int msgStart = this.foundTaskPositions[i][0]
4573 + this.foundTaskTags[i].length;
4574 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1
4576 // at most beginning of next task
4577 if (max_value < msgStart) {
4578 max_value = msgStart; // would only occur if tag is before
4583 for (int j = msgStart; j < max_value; j++) {
4584 if ((c = src[j]) == '\n' || c == '\r') {
4590 for (int j = max_value; j > msgStart; j--) {
4591 if ((c = src[j]) == '*') {
4599 if (msgStart == end)
4602 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4604 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4606 // update the end position of the task
4607 this.foundTaskPositions[i][1] = end;
4608 // get the message source
4609 final int messageLength = end - msgStart + 1;
4610 char[] message = new char[messageLength];
4611 System.arraycopy(src, msgStart, message, 0, messageLength);
4612 this.foundTaskMessages[i] = message;
4616 // chech presence of task: tags
4617 // public void checkTaskTag(int commentStart, int commentEnd) {
4618 // // only look for newer task: tags
4619 // if (this.foundTaskCount > 0 &&
4620 // this.foundTaskPositions[this.foundTaskCount
4621 // - 1][0] >= commentStart) {
4624 // int foundTaskIndex = this.foundTaskCount;
4625 // nextChar: for (int i = commentStart; i < commentEnd && i <
4626 // this.eofPosition; i++) {
4627 // char[] tag = null;
4628 // char[] priority = null;
4629 // // check for tag occurrence
4630 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4631 // tag = this.taskTags[itag];
4632 // priority = this.taskPriorities != null && itag <
4633 // this.taskPriorities.length
4634 // ? this.taskPriorities[itag] : null;
4635 // int tagLength = tag.length;
4636 // for (int t = 0; t < tagLength; t++) {
4637 // if (this.source[i + t] != tag[t])
4638 // continue nextTag;
4640 // if (this.foundTaskTags == null) {
4641 // this.foundTaskTags = new char[5][];
4642 // this.foundTaskMessages = new char[5][];
4643 // this.foundTaskPriorities = new char[5][];
4644 // this.foundTaskPositions = new int[5][];
4645 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4646 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4647 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4648 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4649 // char[this.foundTaskCount * 2][], 0,
4650 // this.foundTaskCount);
4651 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4652 // new char[this.foundTaskCount * 2][], 0,
4653 // this.foundTaskCount);
4654 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions =
4656 // int[this.foundTaskCount * 2][], 0,
4657 // this.foundTaskCount);
4659 // this.foundTaskTags[this.foundTaskCount] = tag;
4660 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4661 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i +
4664 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4665 // this.foundTaskCount++;
4666 // i += tagLength - 1; // will be incremented when looping
4669 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4670 // // retrieve message start and end positions
4671 // int msgStart = this.foundTaskPositions[i][0] +
4672 // this.foundTaskTags[i].length;
4673 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4674 // 1][0] - 1 : commentEnd - 1;
4675 // // at most beginning of next task
4676 // if (max_value < msgStart)
4677 // max_value = msgStart; // would only occur if tag is before EOF.
4680 // for (int j = msgStart; j < max_value; j++) {
4681 // if ((c = this.source[j]) == '\n' || c == '\r') {
4687 // for (int j = max_value; j > msgStart; j--) {
4688 // if ((c = this.source[j]) == '*') {
4696 // if (msgStart == end)
4697 // continue; // empty
4698 // // trim the message
4699 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4701 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4703 // // update the end position of the task
4704 // this.foundTaskPositions[i][1] = end;
4705 // // get the message source
4706 // final int messageLength = end - msgStart + 1;
4707 // char[] message = new char[messageLength];
4708 // System.arraycopy(source, msgStart, message, 0, messageLength);
4709 // this.foundTaskMessages[i] = message;