1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token
24 * (this value is not memorized by the scanner) - getCurrentTokenSource()
25 * which provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into
27 * the stream - currentPosition-1 gives the sourceEnd position into the
31 // private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
34 // flag indicating if processed source contains occurrences of keyword
36 public boolean containsAssertKeyword = false;
38 public boolean recordLineSeparator;
40 public boolean ignorePHPOneLiner = false;
42 public boolean phpMode = false;
45 * This token is set to TokenNameecho if a short tag block begins (i.e.
46 * >?= ... ) Directly after the "=" character the
47 * getNextToken() method returns TokenNameINLINE_HTML In the next call to
48 * the getNextToken() method the value of fFillerToken (==TokenNameecho) is
52 int fFillerToken = TokenNameEOF;
54 public char currentCharacter;
56 public int startPosition;
58 public int currentPosition;
60 public int initialPosition, eofPosition;
62 // after this position eof are generated instead of real token from the
64 public boolean tokenizeComments;
66 public boolean tokenizeWhiteSpace;
68 public boolean tokenizeStrings;
70 // source should be viewed as a window (aka a part)
71 // of a entire very large stream
75 public char[] withoutUnicodeBuffer;
77 public int withoutUnicodePtr;
79 // when == 0 ==> no unicode in the current token
80 public boolean unicodeAsBackSlash = false;
82 public boolean scanningFloatLiteral = false;
84 // support for /** comments
85 public int[] commentStops = new int[10];
87 public int[] commentStarts = new int[10];
89 public int commentPtr = -1; // no comment test with commentPtr value -1
91 protected int lastCommentLinePosition = -1;
93 // diet parsing support - jump over some method body when requested
94 public boolean diet = false;
96 // support for the poor-line-debuggers ....
97 // remember the position of the cr/lf
98 public int[] lineEnds = new int[250];
100 public int linePtr = -1;
102 public boolean wasAcr = false;
104 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
106 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
108 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
110 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
112 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
114 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
116 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
118 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
120 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
122 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
124 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
126 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
128 // ----------------optimized identifier managment------------------
129 static final char[] charArray_a = new char[] { 'a' },
130 charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
131 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' },
132 charArray_f = new char[] { 'f' }, charArray_g = new char[] { 'g' },
133 charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
134 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' },
135 charArray_l = new char[] { 'l' }, charArray_m = new char[] { 'm' },
136 charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
137 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' },
138 charArray_r = new char[] { 'r' }, charArray_s = new char[] { 's' },
139 charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
140 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' },
141 charArray_x = new char[] { 'x' }, charArray_y = new char[] { 'y' },
142 charArray_z = new char[] { 'z' };
144 static final char[] charArray_va = new char[] { '$', 'a' },
145 charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
146 '$', 'c' }, charArray_vd = new char[] { '$', 'd' },
147 charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] {
148 '$', 'f' }, charArray_vg = new char[] { '$', 'g' },
149 charArray_vh = new char[] { '$', 'h' }, charArray_vi = new char[] {
150 '$', 'i' }, charArray_vj = new char[] { '$', 'j' },
151 charArray_vk = new char[] { '$', 'k' }, charArray_vl = new char[] {
152 '$', 'l' }, charArray_vm = new char[] { '$', 'm' },
153 charArray_vn = new char[] { '$', 'n' }, charArray_vo = new char[] {
154 '$', 'o' }, charArray_vp = new char[] { '$', 'p' },
155 charArray_vq = new char[] { '$', 'q' }, charArray_vr = new char[] {
156 '$', 'r' }, charArray_vs = new char[] { '$', 's' },
157 charArray_vt = new char[] { '$', 't' }, charArray_vu = new char[] {
158 '$', 'u' }, charArray_vv = new char[] { '$', 'v' },
159 charArray_vw = new char[] { '$', 'w' }, charArray_vx = new char[] {
160 '$', 'x' }, charArray_vy = new char[] { '$', 'y' },
161 charArray_vz = new char[] { '$', 'z' };
163 public final static int MAX_OBVIOUS = 256;
165 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
167 public final static int C_DOLLAR = 8;
169 public final static int C_LETTER = 4;
171 public final static int C_DIGIT = 3;
173 public final static int C_SEPARATOR = 2;
175 public final static int C_SPACE = 1;
177 for (int i = '0'; i <= '9'; i++)
178 ObviousIdentCharNatures[i] = C_DIGIT;
180 for (int i = 'a'; i <= 'z'; i++)
181 ObviousIdentCharNatures[i] = C_LETTER;
182 for (int i = 'A'; i <= 'Z'; i++)
183 ObviousIdentCharNatures[i] = C_LETTER;
184 ObviousIdentCharNatures['_'] = C_LETTER;
185 for (int i = 127; i <= 255; i++)
186 ObviousIdentCharNatures[i] = C_LETTER;
188 ObviousIdentCharNatures['$'] = C_DOLLAR;
190 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
191 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
192 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
193 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
194 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL
197 ObviousIdentCharNatures['.'] = C_SEPARATOR;
198 ObviousIdentCharNatures[':'] = C_SEPARATOR;
199 ObviousIdentCharNatures[';'] = C_SEPARATOR;
200 ObviousIdentCharNatures[','] = C_SEPARATOR;
201 ObviousIdentCharNatures['['] = C_SEPARATOR;
202 ObviousIdentCharNatures[']'] = C_SEPARATOR;
203 ObviousIdentCharNatures['('] = C_SEPARATOR;
204 ObviousIdentCharNatures[')'] = C_SEPARATOR;
205 ObviousIdentCharNatures['{'] = C_SEPARATOR;
206 ObviousIdentCharNatures['}'] = C_SEPARATOR;
207 ObviousIdentCharNatures['+'] = C_SEPARATOR;
208 ObviousIdentCharNatures['-'] = C_SEPARATOR;
209 ObviousIdentCharNatures['*'] = C_SEPARATOR;
210 ObviousIdentCharNatures['/'] = C_SEPARATOR;
211 ObviousIdentCharNatures['='] = C_SEPARATOR;
212 ObviousIdentCharNatures['&'] = C_SEPARATOR;
213 ObviousIdentCharNatures['|'] = C_SEPARATOR;
214 ObviousIdentCharNatures['?'] = C_SEPARATOR;
215 ObviousIdentCharNatures['<'] = C_SEPARATOR;
216 ObviousIdentCharNatures['>'] = C_SEPARATOR;
217 ObviousIdentCharNatures['!'] = C_SEPARATOR;
218 ObviousIdentCharNatures['%'] = C_SEPARATOR;
219 ObviousIdentCharNatures['^'] = C_SEPARATOR;
220 ObviousIdentCharNatures['~'] = C_SEPARATOR;
221 ObviousIdentCharNatures['"'] = C_SEPARATOR;
222 ObviousIdentCharNatures['\''] = C_SEPARATOR;
225 static final char[] initCharArray = new char[] { '\u0000', '\u0000',
226 '\u0000', '\u0000', '\u0000', '\u0000' };
228 static final int TableSize = 30, InternalTableSize = 6;
230 // 30*6 = 180 entries
231 public static final int OptimizedLength = 6;
234 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
236 // support for detecting non-externalized string literals
237 int currentLineNr = -1;
239 int previousLineNr = -1;
241 NLSLine currentLine = null;
243 List lines = new ArrayList();
245 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
247 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
249 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
251 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
253 public StringLiteral[] nonNLSStrings = null;
255 public boolean checkNonExternalizedStringLiterals = true;
257 public boolean wasNonExternalizedStringLiteral = false;
260 for (int i = 0; i < 6; i++) {
261 for (int j = 0; j < TableSize; j++) {
262 for (int k = 0; k < InternalTableSize; k++) {
263 charArray_length[i][j][k] = initCharArray;
269 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
272 public static final int RoundBracket = 0;
274 public static final int SquareBracket = 1;
276 public static final int CurlyBracket = 2;
278 public static final int BracketKinds = 3;
281 public char[][] foundTaskTags = null;
283 public char[][] foundTaskMessages;
285 public char[][] foundTaskPriorities = null;
287 public int[][] foundTaskPositions;
289 public int foundTaskCount = 0;
291 public char[][] taskTags = null;
293 public char[][] taskPriorities = null;
295 public boolean isTaskCaseSensitive = true;
297 public static final boolean DEBUG = false;
299 public static final boolean TRACE = false;
301 public ICompilationUnit compilationUnit = null;
304 * Determines if the specified character is permissible as the first
305 * character in a PHP identifier or variable
307 * The '$' character for PHP variables is regarded as a correct first
311 public static boolean isPHPIdentOrVarStart(char ch) {
312 if (ch < MAX_OBVIOUS) {
313 return ObviousIdentCharNatures[ch] == C_LETTER
314 || ObviousIdentCharNatures[ch] == C_DOLLAR;
317 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F
319 // ch && ch <= 0xFF);
323 * Determines if the specified character is permissible as the first
324 * character in a PHP identifier.
326 * The '$' character for PHP variables isn't regarded as the first character !
328 public static boolean isPHPIdentifierStart(char ch) {
329 if (ch < MAX_OBVIOUS) {
330 return ObviousIdentCharNatures[ch] == C_LETTER;
333 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
338 * Determines if the specified character may be part of a PHP identifier as
339 * other than the first character
341 public static boolean isPHPIdentifierPart(char ch) {
342 if (ch < MAX_OBVIOUS) {
343 return ObviousIdentCharNatures[ch] == C_LETTER
344 || ObviousIdentCharNatures[ch] == C_DIGIT;
347 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch &&
352 public static boolean isSQLIdentifierPart(char ch) {
353 if (ch < MAX_OBVIOUS) {
354 return ObviousIdentCharNatures[ch] == C_LETTER
355 || ObviousIdentCharNatures[ch] == C_DIGIT;
360 public final boolean atEnd() {
361 // This code is not relevant if source is
362 // Only a part of the real stream input
363 return source.length == currentPosition;
366 public char[] getCurrentIdentifierSource() {
367 // return the token REAL source (aka unicodes are precomputed)
369 // if (withoutUnicodePtr != 0)
370 // //0 is used as a fast test flag so the real first char is in position
373 // withoutUnicodeBuffer,
375 // result = new char[withoutUnicodePtr],
377 // withoutUnicodePtr);
379 int length = currentPosition - startPosition;
380 switch (length) { // see OptimizedLength
382 return optimizedCurrentTokenSource1();
384 return optimizedCurrentTokenSource2();
386 return optimizedCurrentTokenSource3();
388 return optimizedCurrentTokenSource4();
390 return optimizedCurrentTokenSource5();
392 return optimizedCurrentTokenSource6();
395 System.arraycopy(source, startPosition, result = new char[length], 0,
401 public int getCurrentTokenEndPosition() {
402 return this.currentPosition - 1;
405 public final char[] getCurrentTokenSource() {
406 // Return the token REAL source (aka unicodes are precomputed)
408 // if (withoutUnicodePtr != 0)
409 // // 0 is used as a fast test flag so the real first char is in
412 // withoutUnicodeBuffer,
414 // result = new char[withoutUnicodePtr],
416 // withoutUnicodePtr);
419 System.arraycopy(source, startPosition,
420 result = new char[length = currentPosition - startPosition], 0,
426 public final char[] getCurrentTokenSource(int startPos) {
427 // Return the token REAL source (aka unicodes are precomputed)
429 // if (withoutUnicodePtr != 0)
430 // // 0 is used as a fast test flag so the real first char is in
433 // withoutUnicodeBuffer,
435 // result = new char[withoutUnicodePtr],
437 // withoutUnicodePtr);
440 System.arraycopy(source, startPos,
441 result = new char[length = currentPosition - startPos], 0,
447 public final char[] getCurrentTokenSourceString() {
448 // return the token REAL source (aka unicodes are precomputed).
449 // REMOVE the two " that are at the beginning and the end.
451 if (withoutUnicodePtr != 0)
452 // 0 is used as a fast test flag so the real first char is in
454 System.arraycopy(withoutUnicodeBuffer, 2,
455 // 2 is 1 (real start) + 1 (to jump over the ")
456 result = new char[withoutUnicodePtr - 2], 0,
457 withoutUnicodePtr - 2);
460 System.arraycopy(source, startPosition + 1,
461 result = new char[length = currentPosition - startPosition
467 public final boolean equalsCurrentTokenSource(char[] word) {
468 if (word.length != currentPosition - startPosition) {
471 for (int i = 0; i < word.length; i++) {
472 if (word[i] != source[startPosition + i]) {
479 public final char[] getRawTokenSourceEnd() {
480 int length = this.eofPosition - this.currentPosition - 1;
481 char[] sourceEnd = new char[length];
482 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0,
487 public int getCurrentTokenStartPosition() {
488 return this.startPosition;
491 public final String getCurrentStringLiteral() {
492 char[] result = getCurrentStringLiteralSource();
493 return new String(result);
496 public final char[] getCurrentStringLiteralSource() {
497 // Return the token REAL source (aka unicodes are precomputed)
498 if (startPosition + 1 >= currentPosition) {
504 .arraycopy(source, startPosition + 1,
505 result = new char[length = currentPosition
506 - startPosition - 2], 0, length);
511 public final char[] getCurrentStringLiteralSource(int startPos) {
512 // Return the token REAL source (aka unicodes are precomputed)
515 System.arraycopy(source, startPos + 1,
516 result = new char[length = currentPosition - startPos - 2], 0,
523 * Search the source position corresponding to the end of a given line
526 * Line numbers are 1-based, and relative to the scanner initialPosition.
527 * Character positions are 0-based.
529 * In case the given line number is inconsistent, answers -1.
531 public final int getLineEnd(int lineNumber) {
532 if (lineEnds == null)
534 if (lineNumber >= lineEnds.length)
538 if (lineNumber == lineEnds.length - 1)
540 return lineEnds[lineNumber - 1];
541 // next line start one character behind the lineEnd of the previous line
545 * Search the source position corresponding to the beginning of a given line
548 * Line numbers are 1-based, and relative to the scanner initialPosition.
549 * Character positions are 0-based.
551 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
553 * In case the given line number is inconsistent, answers -1.
555 public final int getLineStart(int lineNumber) {
556 if (lineEnds == null)
558 if (lineNumber >= lineEnds.length)
563 return initialPosition;
564 return lineEnds[lineNumber - 2] + 1;
565 // next line start one character behind the lineEnd of the previous line
568 public final boolean getNextChar(char testedChar) {
570 // handle the case of unicode.
571 // when a unicode appears then we must use a buffer that holds char
573 // At the end of this method currentCharacter holds the new visited char
574 // and currentPosition points right next after it
575 // Both previous lines are true if the currentCharacter is == to the
577 // On false, no side effect has occured.
578 // ALL getNextChar.... ARE OPTIMIZED COPIES
579 int temp = currentPosition;
581 currentCharacter = source[currentPosition++];
582 // if (((currentCharacter = source[currentPosition++]) == '\\')
583 // && (source[currentPosition] == 'u')) {
584 // //-------------unicode traitement ------------
585 // int c1, c2, c3, c4;
586 // int unicodeSize = 6;
587 // currentPosition++;
588 // while (source[currentPosition] == 'u') {
589 // currentPosition++;
593 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
596 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
599 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
602 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
605 // currentPosition = temp;
609 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
610 // if (currentCharacter != testedChar) {
611 // currentPosition = temp;
614 // unicodeAsBackSlash = currentCharacter == '\\';
616 // //need the unicode buffer
617 // if (withoutUnicodePtr == 0) {
618 // //buffer all the entries that have been left aside....
619 // withoutUnicodePtr = currentPosition - unicodeSize -
624 // withoutUnicodeBuffer,
626 // withoutUnicodePtr);
628 // //fill the buffer with the char
629 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
632 // } //-------------end unicode traitement--------------
634 if (currentCharacter != testedChar) {
635 currentPosition = temp;
638 unicodeAsBackSlash = false;
639 // if (withoutUnicodePtr != 0)
640 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
643 } catch (IndexOutOfBoundsException e) {
644 unicodeAsBackSlash = false;
645 currentPosition = temp;
650 public final int getNextChar(char testedChar1, char testedChar2) {
651 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
652 // test can be done with (x==0) for the first and (x>0) for the second
653 // handle the case of unicode.
654 // when a unicode appears then we must use a buffer that holds char
656 // At the end of this method currentCharacter holds the new visited char
657 // and currentPosition points right next after it
658 // Both previous lines are true if the currentCharacter is == to the
660 // On false, no side effect has occured.
661 // ALL getNextChar.... ARE OPTIMIZED COPIES
662 int temp = currentPosition;
665 currentCharacter = source[currentPosition++];
666 // if (((currentCharacter = source[currentPosition++]) == '\\')
667 // && (source[currentPosition] == 'u')) {
668 // //-------------unicode traitement ------------
669 // int c1, c2, c3, c4;
670 // int unicodeSize = 6;
671 // currentPosition++;
672 // while (source[currentPosition] == 'u') {
673 // currentPosition++;
677 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
680 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
683 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
686 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
689 // currentPosition = temp;
693 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
694 // if (currentCharacter == testedChar1)
696 // else if (currentCharacter == testedChar2)
699 // currentPosition = temp;
703 // //need the unicode buffer
704 // if (withoutUnicodePtr == 0) {
705 // //buffer all the entries that have been left aside....
706 // withoutUnicodePtr = currentPosition - unicodeSize -
711 // withoutUnicodeBuffer,
713 // withoutUnicodePtr);
715 // //fill the buffer with the char
716 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
718 // } //-------------end unicode traitement--------------
720 if (currentCharacter == testedChar1)
722 else if (currentCharacter == testedChar2)
725 currentPosition = temp;
728 // if (withoutUnicodePtr != 0)
729 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
732 } catch (IndexOutOfBoundsException e) {
733 currentPosition = temp;
738 public final boolean getNextCharAsDigit() {
740 // handle the case of unicode.
741 // when a unicode appears then we must use a buffer that holds char
743 // At the end of this method currentCharacter holds the new visited char
744 // and currentPosition points right next after it
745 // Both previous lines are true if the currentCharacter is a digit
746 // On false, no side effect has occured.
747 // ALL getNextChar.... ARE OPTIMIZED COPIES
748 int temp = currentPosition;
750 currentCharacter = source[currentPosition++];
751 // if (((currentCharacter = source[currentPosition++]) == '\\')
752 // && (source[currentPosition] == 'u')) {
753 // //-------------unicode traitement ------------
754 // int c1, c2, c3, c4;
755 // int unicodeSize = 6;
756 // currentPosition++;
757 // while (source[currentPosition] == 'u') {
758 // currentPosition++;
762 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
765 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
768 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
771 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
774 // currentPosition = temp;
778 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
779 // if (!Character.isDigit(currentCharacter)) {
780 // currentPosition = temp;
784 // //need the unicode buffer
785 // if (withoutUnicodePtr == 0) {
786 // //buffer all the entries that have been left aside....
787 // withoutUnicodePtr = currentPosition - unicodeSize -
792 // withoutUnicodeBuffer,
794 // withoutUnicodePtr);
796 // //fill the buffer with the char
797 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
799 // } //-------------end unicode traitement--------------
801 if (!Character.isDigit(currentCharacter)) {
802 currentPosition = temp;
805 // if (withoutUnicodePtr != 0)
806 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
809 } catch (IndexOutOfBoundsException e) {
810 currentPosition = temp;
815 public final boolean getNextCharAsDigit(int radix) {
817 // handle the case of unicode.
818 // when a unicode appears then we must use a buffer that holds char
820 // At the end of this method currentCharacter holds the new visited char
821 // and currentPosition points right next after it
822 // Both previous lines are true if the currentCharacter is a digit base
825 // On false, no side effect has occured.
826 // ALL getNextChar.... ARE OPTIMIZED COPIES
827 int temp = currentPosition;
829 currentCharacter = source[currentPosition++];
830 // if (((currentCharacter = source[currentPosition++]) == '\\')
831 // && (source[currentPosition] == 'u')) {
832 // //-------------unicode traitement ------------
833 // int c1, c2, c3, c4;
834 // int unicodeSize = 6;
835 // currentPosition++;
836 // while (source[currentPosition] == 'u') {
837 // currentPosition++;
841 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
844 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
847 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
850 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
853 // currentPosition = temp;
857 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
858 // if (Character.digit(currentCharacter, radix) == -1) {
859 // currentPosition = temp;
863 // //need the unicode buffer
864 // if (withoutUnicodePtr == 0) {
865 // //buffer all the entries that have been left aside....
866 // withoutUnicodePtr = currentPosition - unicodeSize -
871 // withoutUnicodeBuffer,
873 // withoutUnicodePtr);
875 // //fill the buffer with the char
876 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
878 // } //-------------end unicode traitement--------------
880 if (Character.digit(currentCharacter, radix) == -1) {
881 currentPosition = temp;
884 // if (withoutUnicodePtr != 0)
885 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
888 } catch (IndexOutOfBoundsException e) {
889 currentPosition = temp;
894 public boolean getNextCharAsJavaIdentifierPart() {
896 // handle the case of unicode.
897 // when a unicode appears then we must use a buffer that holds char
899 // At the end of this method currentCharacter holds the new visited char
900 // and currentPosition points right next after it
901 // Both previous lines are true if the currentCharacter is a
902 // JavaIdentifierPart
903 // On false, no side effect has occured.
904 // ALL getNextChar.... ARE OPTIMIZED COPIES
905 int temp = currentPosition;
907 currentCharacter = source[currentPosition++];
908 // if (((currentCharacter = source[currentPosition++]) == '\\')
909 // && (source[currentPosition] == 'u')) {
910 // //-------------unicode traitement ------------
911 // int c1, c2, c3, c4;
912 // int unicodeSize = 6;
913 // currentPosition++;
914 // while (source[currentPosition] == 'u') {
915 // currentPosition++;
919 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
922 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
925 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
928 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
931 // currentPosition = temp;
935 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
936 // if (!isPHPIdentifierPart(currentCharacter)) {
937 // currentPosition = temp;
941 // //need the unicode buffer
942 // if (withoutUnicodePtr == 0) {
943 // //buffer all the entries that have been left aside....
944 // withoutUnicodePtr = currentPosition - unicodeSize -
949 // withoutUnicodeBuffer,
951 // withoutUnicodePtr);
953 // //fill the buffer with the char
954 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
956 // } //-------------end unicode traitement--------------
958 if (!isPHPIdentifierPart(currentCharacter)) {
959 currentPosition = temp;
962 // if (withoutUnicodePtr != 0)
963 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
966 } catch (IndexOutOfBoundsException e) {
967 currentPosition = temp;
972 public int getCastOrParen() {
973 int tempPosition = currentPosition;
974 char tempCharacter = currentCharacter;
975 int tempToken = TokenNameLPAREN;
976 boolean found = false;
977 StringBuffer buf = new StringBuffer();
980 currentCharacter = source[currentPosition++];
981 } while (currentCharacter == ' ' || currentCharacter == '\t');
982 while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
983 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
984 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
985 buf.append(currentCharacter);
986 currentCharacter = source[currentPosition++];
988 if (buf.length() >= 3 && buf.length() <= 7) {
989 char[] data = buf.toString().toCharArray();
991 switch (data.length) {
994 if ((data[index] == 'i') && (data[++index] == 'n')
995 && (data[++index] == 't')) {
997 tempToken = TokenNameintCAST;
1002 if ((data[index] == 'b') && (data[++index] == 'o')
1003 && (data[++index] == 'o') && (data[++index] == 'l')) {
1005 tempToken = TokenNameboolCAST;
1008 if ((data[index] == 'r') && (data[++index] == 'e')
1009 && (data[++index] == 'a')
1010 && (data[++index] == 'l')) {
1012 tempToken = TokenNamedoubleCAST;
1017 // array unset float
1018 if ((data[index] == 'a') && (data[++index] == 'r')
1019 && (data[++index] == 'r') && (data[++index] == 'a')
1020 && (data[++index] == 'y')) {
1022 tempToken = TokenNamearrayCAST;
1025 if ((data[index] == 'u') && (data[++index] == 'n')
1026 && (data[++index] == 's')
1027 && (data[++index] == 'e')
1028 && (data[++index] == 't')) {
1030 tempToken = TokenNameunsetCAST;
1033 if ((data[index] == 'f') && (data[++index] == 'l')
1034 && (data[++index] == 'o')
1035 && (data[++index] == 'a')
1036 && (data[++index] == 't')) {
1038 tempToken = TokenNamedoubleCAST;
1044 // object string double
1045 if ((data[index] == 'o') && (data[++index] == 'b')
1046 && (data[++index] == 'j') && (data[++index] == 'e')
1047 && (data[++index] == 'c') && (data[++index] == 't')) {
1049 tempToken = TokenNameobjectCAST;
1052 if ((data[index] == 's') && (data[++index] == 't')
1053 && (data[++index] == 'r')
1054 && (data[++index] == 'i')
1055 && (data[++index] == 'n')
1056 && (data[++index] == 'g')) {
1058 tempToken = TokenNamestringCAST;
1061 if ((data[index] == 'd') && (data[++index] == 'o')
1062 && (data[++index] == 'u')
1063 && (data[++index] == 'b')
1064 && (data[++index] == 'l')
1065 && (data[++index] == 'e')) {
1067 tempToken = TokenNamedoubleCAST;
1074 if ((data[index] == 'b') && (data[++index] == 'o')
1075 && (data[++index] == 'o') && (data[++index] == 'l')
1076 && (data[++index] == 'e') && (data[++index] == 'a')
1077 && (data[++index] == 'n')) {
1079 tempToken = TokenNameboolCAST;
1082 if ((data[index] == 'i') && (data[++index] == 'n')
1083 && (data[++index] == 't')
1084 && (data[++index] == 'e')
1085 && (data[++index] == 'g')
1086 && (data[++index] == 'e')
1087 && (data[++index] == 'r')) {
1089 tempToken = TokenNameintCAST;
1095 while (currentCharacter == ' ' || currentCharacter == '\t') {
1096 currentCharacter = source[currentPosition++];
1098 if (currentCharacter == ')') {
1103 } catch (IndexOutOfBoundsException e) {
1105 currentCharacter = tempCharacter;
1106 currentPosition = tempPosition;
1107 return TokenNameLPAREN;
1110 public void consumeStringInterpolated() throws InvalidInputException {
1112 // consume next character
1113 unicodeAsBackSlash = false;
1114 currentCharacter = source[currentPosition++];
1115 // if (((currentCharacter = source[currentPosition++]) == '\\')
1116 // && (source[currentPosition] == 'u')) {
1117 // getNextUnicodeChar();
1119 // if (withoutUnicodePtr != 0) {
1120 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1121 // currentCharacter;
1124 while (currentCharacter != '`') {
1125 /** ** in PHP \r and \n are valid in string literals *** */
1126 // if ((currentCharacter == '\n')
1127 // || (currentCharacter == '\r')) {
1128 // // relocate if finding another quote fairly close: thus
1130 // '/u000D' will be fully consumed
1131 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1132 // if (currentPosition + lookAhead == source.length)
1134 // if (source[currentPosition + lookAhead] == '\n')
1136 // if (source[currentPosition + lookAhead] == '\"') {
1137 // currentPosition += lookAhead + 1;
1141 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1143 if (currentCharacter == '\\') {
1144 int escapeSize = currentPosition;
1145 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1146 // scanEscapeCharacter make a side effect on this value and
1148 // the previous value few lines down this one
1149 scanDoubleQuotedEscapeCharacter();
1150 escapeSize = currentPosition - escapeSize;
1151 if (withoutUnicodePtr == 0) {
1152 // buffer all the entries that have been left aside....
1153 withoutUnicodePtr = currentPosition - escapeSize - 1
1155 System.arraycopy(source, startPosition,
1156 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1157 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1158 } else { // overwrite the / in the buffer
1159 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1160 if (backSlashAsUnicodeInString) { // there are TWO \
1162 // where only one is correct
1163 withoutUnicodePtr--;
1166 } else if ((currentCharacter == '\r')
1167 || (currentCharacter == '\n')) {
1168 if (recordLineSeparator) {
1169 pushLineSeparator();
1172 // consume next character
1173 unicodeAsBackSlash = false;
1174 currentCharacter = source[currentPosition++];
1175 // if (((currentCharacter = source[currentPosition++]) == '\\')
1176 // && (source[currentPosition] == 'u')) {
1177 // getNextUnicodeChar();
1179 if (withoutUnicodePtr != 0) {
1180 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1184 } catch (IndexOutOfBoundsException e) {
1185 // reset end position for error reporting
1186 currentPosition -= 2;
1187 throw new InvalidInputException(UNTERMINATED_STRING);
1188 } catch (InvalidInputException e) {
1189 if (e.getMessage().equals(INVALID_ESCAPE)) {
1190 // relocate if finding another quote fairly close: thus unicode
1191 // '/u000D' will be fully consumed
1192 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1193 if (currentPosition + lookAhead == source.length)
1195 if (source[currentPosition + lookAhead] == '\n')
1197 if (source[currentPosition + lookAhead] == '`') {
1198 currentPosition += lookAhead + 1;
1205 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1207 // //$NON-NLS-?$ where ? is an
1209 if (currentLine == null) {
1210 currentLine = new NLSLine();
1211 lines.add(currentLine);
1213 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1214 startPosition, currentPosition - 1));
1218 public void consumeStringConstant() throws InvalidInputException {
1220 // consume next character
1221 unicodeAsBackSlash = false;
1222 currentCharacter = source[currentPosition++];
1223 // if (((currentCharacter = source[currentPosition++]) == '\\')
1224 // && (source[currentPosition] == 'u')) {
1225 // getNextUnicodeChar();
1227 // if (withoutUnicodePtr != 0) {
1228 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1229 // currentCharacter;
1232 while (currentCharacter != '\'') {
1233 /** ** in PHP \r and \n are valid in string literals *** */
1234 // if ((currentCharacter == '\n')
1235 // || (currentCharacter == '\r')) {
1236 // // relocate if finding another quote fairly close: thus
1238 // '/u000D' will be fully consumed
1239 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1240 // if (currentPosition + lookAhead == source.length)
1242 // if (source[currentPosition + lookAhead] == '\n')
1244 // if (source[currentPosition + lookAhead] == '\"') {
1245 // currentPosition += lookAhead + 1;
1249 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1251 if (currentCharacter == '\\') {
1252 int escapeSize = currentPosition;
1253 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1254 // scanEscapeCharacter make a side effect on this value and
1256 // the previous value few lines down this one
1257 scanSingleQuotedEscapeCharacter();
1258 escapeSize = currentPosition - escapeSize;
1259 if (withoutUnicodePtr == 0) {
1260 // buffer all the entries that have been left aside....
1261 withoutUnicodePtr = currentPosition - escapeSize - 1
1263 System.arraycopy(source, startPosition,
1264 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1265 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1266 } else { // overwrite the / in the buffer
1267 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1268 if (backSlashAsUnicodeInString) { // there are TWO \
1270 // where only one is correct
1271 withoutUnicodePtr--;
1274 } else if ((currentCharacter == '\r')
1275 || (currentCharacter == '\n')) {
1276 if (recordLineSeparator) {
1277 pushLineSeparator();
1280 // consume next character
1281 unicodeAsBackSlash = false;
1282 currentCharacter = source[currentPosition++];
1283 // if (((currentCharacter = source[currentPosition++]) == '\\')
1284 // && (source[currentPosition] == 'u')) {
1285 // getNextUnicodeChar();
1287 if (withoutUnicodePtr != 0) {
1288 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1292 } catch (IndexOutOfBoundsException e) {
1293 // reset end position for error reporting
1294 currentPosition -= 2;
1295 throw new InvalidInputException(UNTERMINATED_STRING);
1296 } catch (InvalidInputException e) {
1297 if (e.getMessage().equals(INVALID_ESCAPE)) {
1298 // relocate if finding another quote fairly close: thus unicode
1299 // '/u000D' will be fully consumed
1300 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1301 if (currentPosition + lookAhead == source.length)
1303 if (source[currentPosition + lookAhead] == '\n')
1305 if (source[currentPosition + lookAhead] == '\'') {
1306 currentPosition += lookAhead + 1;
1313 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1315 // //$NON-NLS-?$ where ? is an
1317 if (currentLine == null) {
1318 currentLine = new NLSLine();
1319 lines.add(currentLine);
1321 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1322 startPosition, currentPosition - 1));
1326 public void consumeStringLiteral() throws InvalidInputException {
1328 int openDollarBrace = 0;
1329 // consume next character
1330 unicodeAsBackSlash = false;
1331 currentCharacter = source[currentPosition++];
1332 while (currentCharacter != '"' || openDollarBrace > 0) {
1333 /** ** in PHP \r and \n are valid in string literals *** */
1334 if (currentCharacter == '\\') {
1335 int escapeSize = currentPosition;
1336 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1337 // scanEscapeCharacter make a side effect on this value and
1339 // the previous value few lines down this one
1340 scanDoubleQuotedEscapeCharacter();
1341 escapeSize = currentPosition - escapeSize;
1342 if (withoutUnicodePtr == 0) {
1343 // buffer all the entries that have been left aside....
1344 withoutUnicodePtr = currentPosition - escapeSize - 1
1346 System.arraycopy(source, startPosition,
1347 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1348 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1349 } else { // overwrite the / in the buffer
1350 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1351 if (backSlashAsUnicodeInString) { // there are TWO \
1353 // where only one is correct
1354 withoutUnicodePtr--;
1357 } else if (currentCharacter == '$'
1358 && source[currentPosition] == '{') {
1360 } else if (currentCharacter == '{'
1361 && source[currentPosition] == '$') {
1363 } else if (currentCharacter == '}') {
1365 } else if ((currentCharacter == '\r')
1366 || (currentCharacter == '\n')) {
1367 if (recordLineSeparator) {
1368 pushLineSeparator();
1371 // consume next character
1372 unicodeAsBackSlash = false;
1373 currentCharacter = source[currentPosition++];
1374 if (withoutUnicodePtr != 0) {
1375 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1378 } catch (IndexOutOfBoundsException e) {
1379 // reset end position for error reporting
1380 currentPosition -= 2;
1381 throw new InvalidInputException(UNTERMINATED_STRING);
1382 } catch (InvalidInputException e) {
1383 if (e.getMessage().equals(INVALID_ESCAPE)) {
1384 // relocate if finding another quote fairly close: thus unicode
1385 // '/u000D' will be fully consumed
1386 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1387 if (currentPosition + lookAhead == source.length)
1389 if (source[currentPosition + lookAhead] == '\n')
1391 if (source[currentPosition + lookAhead] == '\"') {
1392 currentPosition += lookAhead + 1;
1399 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1401 // //$NON-NLS-?$ where ? is an
1403 if (currentLine == null) {
1404 currentLine = new NLSLine();
1405 lines.add(currentLine);
1407 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1408 startPosition, currentPosition - 1));
1412 public int getNextToken() throws InvalidInputException {
1414 return getInlinedHTMLToken(currentPosition);
1416 if (fFillerToken != TokenNameEOF) {
1418 startPosition = currentPosition;
1419 tempToken = fFillerToken;
1420 fFillerToken = TokenNameEOF;
1423 this.wasAcr = false;
1425 jumpOverMethodBody();
1427 return currentPosition > source.length ? TokenNameEOF
1432 withoutUnicodePtr = 0;
1433 // ---------Consume white space and handles
1434 // startPosition---------
1435 int whiteStart = currentPosition;
1436 startPosition = currentPosition;
1437 currentCharacter = source[currentPosition++];
1439 while ((currentCharacter == ' ')
1440 || Character.isWhitespace(currentCharacter)) {
1441 startPosition = currentPosition;
1442 currentCharacter = source[currentPosition++];
1443 if ((currentCharacter == '\r')
1444 || (currentCharacter == '\n')) {
1445 checkNonExternalizeString();
1446 if (recordLineSeparator) {
1447 pushLineSeparator();
1453 if (tokenizeWhiteSpace
1454 && (whiteStart != currentPosition - 1)) {
1455 // reposition scanner in case we are interested by
1458 startPosition = whiteStart;
1459 return TokenNameWHITESPACE;
1461 // little trick to get out in the middle of a source
1463 if (currentPosition > eofPosition)
1464 return TokenNameEOF;
1465 // ---------Identify the next token-------------
1466 switch (currentCharacter) {
1468 return getCastOrParen();
1470 return TokenNameRPAREN;
1472 return TokenNameLBRACE;
1474 return TokenNameRBRACE;
1476 return TokenNameLBRACKET;
1478 return TokenNameRBRACKET;
1480 return TokenNameSEMICOLON;
1482 return TokenNameCOMMA;
1484 if (getNextChar('='))
1485 return TokenNameDOT_EQUAL;
1486 if (getNextCharAsDigit())
1487 return scanNumber(true);
1488 return TokenNameDOT;
1491 if ((test = getNextChar('+', '=')) == 0)
1492 return TokenNamePLUS_PLUS;
1494 return TokenNamePLUS_EQUAL;
1495 return TokenNamePLUS;
1499 if ((test = getNextChar('-', '=')) == 0)
1500 return TokenNameMINUS_MINUS;
1502 return TokenNameMINUS_EQUAL;
1503 if (getNextChar('>'))
1504 return TokenNameMINUS_GREATER;
1505 return TokenNameMINUS;
1508 if (getNextChar('='))
1509 return TokenNameTWIDDLE_EQUAL;
1510 return TokenNameTWIDDLE;
1512 if (getNextChar('=')) {
1513 if (getNextChar('=')) {
1514 return TokenNameNOT_EQUAL_EQUAL;
1516 return TokenNameNOT_EQUAL;
1518 return TokenNameNOT;
1520 if (getNextChar('='))
1521 return TokenNameMULTIPLY_EQUAL;
1522 return TokenNameMULTIPLY;
1524 if (getNextChar('='))
1525 return TokenNameREMAINDER_EQUAL;
1526 return TokenNameREMAINDER;
1528 int oldPosition = currentPosition;
1530 currentCharacter = source[currentPosition++];
1531 } catch (IndexOutOfBoundsException e) {
1532 currentPosition = oldPosition;
1533 return TokenNameLESS;
1535 switch (currentCharacter) {
1537 return TokenNameLESS_EQUAL;
1539 return TokenNameNOT_EQUAL;
1541 if (getNextChar('='))
1542 return TokenNameLEFT_SHIFT_EQUAL;
1543 if (getNextChar('<')) {
1544 currentCharacter = source[currentPosition++];
1545 while (Character.isWhitespace(currentCharacter)) {
1546 currentCharacter = source[currentPosition++];
1548 int heredocStart = currentPosition - 1;
1549 int heredocLength = 0;
1550 if (isPHPIdentifierStart(currentCharacter)) {
1551 currentCharacter = source[currentPosition++];
1553 return TokenNameERROR;
1555 while (isPHPIdentifierPart(currentCharacter)) {
1556 currentCharacter = source[currentPosition++];
1558 heredocLength = currentPosition - heredocStart
1560 // heredoc end-tag determination
1561 boolean endTag = true;
1564 ch = source[currentPosition++];
1565 if (ch == '\r' || ch == '\n') {
1566 if (recordLineSeparator) {
1567 pushLineSeparator();
1571 for (int i = 0; i < heredocLength; i++) {
1572 if (source[currentPosition + i] != source[heredocStart
1579 currentPosition += heredocLength - 1;
1580 currentCharacter = source[currentPosition++];
1581 break; // do...while loop
1587 return TokenNameHEREDOC;
1589 return TokenNameLEFT_SHIFT;
1591 currentPosition = oldPosition;
1592 return TokenNameLESS;
1596 if ((test = getNextChar('=', '>')) == 0)
1597 return TokenNameGREATER_EQUAL;
1599 if ((test = getNextChar('=', '>')) == 0)
1600 return TokenNameRIGHT_SHIFT_EQUAL;
1601 return TokenNameRIGHT_SHIFT;
1603 return TokenNameGREATER;
1606 if (getNextChar('=')) {
1607 if (getNextChar('=')) {
1608 return TokenNameEQUAL_EQUAL_EQUAL;
1610 return TokenNameEQUAL_EQUAL;
1612 if (getNextChar('>'))
1613 return TokenNameEQUAL_GREATER;
1614 return TokenNameEQUAL;
1617 if ((test = getNextChar('&', '=')) == 0)
1618 return TokenNameAND_AND;
1620 return TokenNameAND_EQUAL;
1621 return TokenNameAND;
1625 if ((test = getNextChar('|', '=')) == 0)
1626 return TokenNameOR_OR;
1628 return TokenNameOR_EQUAL;
1632 if (getNextChar('='))
1633 return TokenNameXOR_EQUAL;
1634 return TokenNameXOR;
1636 if (getNextChar('>')) {
1638 if (currentPosition == source.length) {
1640 return TokenNameINLINE_HTML;
1642 return getInlinedHTMLToken(currentPosition - 2);
1644 return TokenNameQUESTION;
1646 if (getNextChar(':'))
1647 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1648 return TokenNameCOLON;
1652 consumeStringConstant();
1653 return TokenNameStringSingleQuote;
1655 // if (tokenizeStrings) {
1656 consumeStringLiteral();
1657 return TokenNameStringDoubleQuote;
1659 // return TokenNameEncapsedString2;
1661 // if (tokenizeStrings) {
1662 consumeStringInterpolated();
1663 return TokenNameStringInterpolated;
1665 // return TokenNameEncapsedString0;
1668 char startChar = currentCharacter;
1669 if (getNextChar('=') && startChar == '/') {
1670 return TokenNameDIVIDE_EQUAL;
1673 if ((startChar == '#')
1674 || (test = getNextChar('/', '*')) == 0) {
1676 this.lastCommentLinePosition = this.currentPosition;
1677 int endPositionForLineComment = 0;
1678 try { // get the next char
1679 currentCharacter = source[currentPosition++];
1680 // if (((currentCharacter =
1681 // source[currentPosition++])
1683 // && (source[currentPosition] == 'u')) {
1684 // //-------------unicode traitement
1686 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1687 // currentPosition++;
1688 // while (source[currentPosition] == 'u') {
1689 // currentPosition++;
1692 // Character.getNumericValue(source[currentPosition++]))
1696 // Character.getNumericValue(source[currentPosition++]))
1700 // Character.getNumericValue(source[currentPosition++]))
1704 // Character.getNumericValue(source[currentPosition++]))
1708 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1710 // currentCharacter =
1711 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
1715 // handle the \\u case manually into comment
1716 // if (currentCharacter == '\\') {
1717 // if (source[currentPosition] == '\\')
1718 // currentPosition++;
1719 // } //jump over the \\
1720 boolean isUnicode = false;
1721 while (currentCharacter != '\r'
1722 && currentCharacter != '\n') {
1723 this.lastCommentLinePosition = this.currentPosition;
1724 if (currentCharacter == '?') {
1725 if (getNextChar('>')) {
1726 // ?> breaks line comments
1727 startPosition = currentPosition - 2;
1729 return TokenNameINLINE_HTML;
1732 // get the next char
1734 currentCharacter = source[currentPosition++];
1735 // if (((currentCharacter =
1736 // source[currentPosition++])
1738 // && (source[currentPosition] == 'u')) {
1739 // isUnicode = true;
1740 // //-------------unicode traitement
1742 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1743 // currentPosition++;
1744 // while (source[currentPosition] == 'u') {
1745 // currentPosition++;
1748 // Character.getNumericValue(source[currentPosition++]))
1752 // Character.getNumericValue(
1753 // source[currentPosition++]))
1757 // Character.getNumericValue(
1758 // source[currentPosition++]))
1762 // Character.getNumericValue(
1763 // source[currentPosition++]))
1767 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1769 // currentCharacter =
1770 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
1774 // handle the \\u case manually into comment
1775 // if (currentCharacter == '\\') {
1776 // if (source[currentPosition] == '\\')
1777 // currentPosition++;
1778 // } //jump over the \\
1781 endPositionForLineComment = currentPosition - 6;
1783 endPositionForLineComment = currentPosition - 1;
1785 // recordComment(false);
1786 recordComment(TokenNameCOMMENT_LINE);
1787 if (this.taskTags != null)
1788 checkTaskTag(this.startPosition,
1789 this.currentPosition);
1790 if ((currentCharacter == '\r')
1791 || (currentCharacter == '\n')) {
1792 checkNonExternalizeString();
1793 if (recordLineSeparator) {
1795 pushUnicodeLineSeparator();
1797 pushLineSeparator();
1803 if (tokenizeComments) {
1805 currentPosition = endPositionForLineComment;
1806 // reset one character behind
1808 return TokenNameCOMMENT_LINE;
1810 } catch (IndexOutOfBoundsException e) { // an eof
1813 if (tokenizeComments) {
1815 // reset one character behind
1816 return TokenNameCOMMENT_LINE;
1822 // traditional and annotation comment
1823 boolean isJavadoc = false, star = false;
1824 // consume next character
1825 unicodeAsBackSlash = false;
1826 currentCharacter = source[currentPosition++];
1827 // if (((currentCharacter =
1828 // source[currentPosition++]) ==
1830 // && (source[currentPosition] == 'u')) {
1831 // getNextUnicodeChar();
1833 // if (withoutUnicodePtr != 0) {
1834 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1835 // currentCharacter;
1838 if (currentCharacter == '*') {
1842 if ((currentCharacter == '\r')
1843 || (currentCharacter == '\n')) {
1844 checkNonExternalizeString();
1845 if (recordLineSeparator) {
1846 pushLineSeparator();
1851 try { // get the next char
1852 currentCharacter = source[currentPosition++];
1853 // if (((currentCharacter =
1854 // source[currentPosition++])
1856 // && (source[currentPosition] == 'u')) {
1857 // //-------------unicode traitement
1859 // getNextUnicodeChar();
1861 // handle the \\u case manually into comment
1862 // if (currentCharacter == '\\') {
1863 // if (source[currentPosition] == '\\')
1864 // currentPosition++;
1865 // //jump over the \\
1867 // empty comment is not a javadoc /**/
1868 if (currentCharacter == '/') {
1871 // loop until end of comment */
1872 while ((currentCharacter != '/') || (!star)) {
1873 if ((currentCharacter == '\r')
1874 || (currentCharacter == '\n')) {
1875 checkNonExternalizeString();
1876 if (recordLineSeparator) {
1877 pushLineSeparator();
1882 star = currentCharacter == '*';
1884 currentCharacter = source[currentPosition++];
1885 // if (((currentCharacter =
1886 // source[currentPosition++])
1888 // && (source[currentPosition] == 'u')) {
1889 // //-------------unicode traitement
1891 // getNextUnicodeChar();
1893 // handle the \\u case manually into comment
1894 // if (currentCharacter == '\\') {
1895 // if (source[currentPosition] == '\\')
1896 // currentPosition++;
1897 // } //jump over the \\
1899 // recordComment(isJavadoc);
1901 recordComment(TokenNameCOMMENT_PHPDOC);
1903 recordComment(TokenNameCOMMENT_BLOCK);
1906 if (tokenizeComments) {
1908 return TokenNameCOMMENT_PHPDOC;
1909 return TokenNameCOMMENT_BLOCK;
1912 if (this.taskTags != null) {
1913 checkTaskTag(this.startPosition,
1914 this.currentPosition);
1916 } catch (IndexOutOfBoundsException e) {
1917 // reset end position for error reporting
1918 currentPosition -= 2;
1919 throw new InvalidInputException(
1920 UNTERMINATED_COMMENT);
1924 return TokenNameDIVIDE;
1928 return TokenNameEOF;
1929 // the atEnd may not be <currentPosition ==
1930 // source.length> if
1931 // source is only some part of a real (external) stream
1932 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1934 if (currentCharacter == '$') {
1935 int oldPosition = currentPosition;
1937 currentCharacter = source[currentPosition++];
1938 if (isPHPIdentifierStart(currentCharacter)) {
1939 return scanIdentifierOrKeyword(true);
1941 currentPosition = oldPosition;
1942 return TokenNameDOLLAR;
1944 } catch (IndexOutOfBoundsException e) {
1945 currentPosition = oldPosition;
1946 return TokenNameDOLLAR;
1949 if (isPHPIdentifierStart(currentCharacter))
1950 return scanIdentifierOrKeyword(false);
1951 if (Character.isDigit(currentCharacter))
1952 return scanNumber(false);
1953 return TokenNameERROR;
1956 } // -----------------end switch while try--------------------
1957 catch (IndexOutOfBoundsException e) {
1960 return TokenNameEOF;
1965 * @throws InvalidInputException
1967 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1968 boolean phpShortTag = false; // true, if <?= detected
1969 if (currentPosition > source.length) {
1970 currentPosition = source.length;
1971 return TokenNameEOF;
1973 startPosition = start;
1976 currentCharacter = source[currentPosition++];
1977 if (currentCharacter == '<') {
1978 if (getNextChar('?')) {
1979 currentCharacter = source[currentPosition++];
1980 if ((currentCharacter != 'P')
1981 && (currentCharacter != 'p')) {
1982 if (currentCharacter != '=') { // <?=
1984 phpShortTag = false;
1989 if (ignorePHPOneLiner) { // for CodeFormatter
1990 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1993 fFillerToken = TokenNameECHO_INVISIBLE;
1995 return TokenNameINLINE_HTML;
1998 boolean foundXML = false;
1999 if (getNextChar('X', 'x') >= 0) {
2000 if (getNextChar('M', 'm') >= 0) {
2001 if (getNextChar('L', 'l') >= 0) {
2010 fFillerToken = TokenNameECHO_INVISIBLE;
2012 return TokenNameINLINE_HTML;
2015 if (getNextChar('H', 'h') >= 0) {
2016 if (getNextChar('P', 'p') >= 0) {
2018 if (ignorePHPOneLiner) {
2019 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
2021 return TokenNameINLINE_HTML;
2025 return TokenNameINLINE_HTML;
2033 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2034 if (recordLineSeparator) {
2035 pushLineSeparator();
2040 } // -----------------while--------------------
2042 return TokenNameINLINE_HTML;
2043 } // -----------------try--------------------
2044 catch (IndexOutOfBoundsException e) {
2045 startPosition = start;
2049 return TokenNameINLINE_HTML;
2053 * check if the PHP is only in this line (for CodeFormatter)
2057 private int lookAheadLinePHPTag() {
2058 int currentPositionInLine = currentPosition;
2059 char previousCharInLine = ' ';
2060 char currentCharInLine = ' ';
2061 boolean singleQuotedStringActive = false;
2062 boolean doubleQuotedStringActive = false;
2065 // look ahead in this line
2067 previousCharInLine = currentCharInLine;
2068 currentCharInLine = source[currentPositionInLine++];
2069 switch (currentCharInLine) {
2071 if (previousCharInLine == '?') {
2072 // update the scanner's current Position in the source
2073 currentPosition = currentPositionInLine;
2074 // use as "dummy" token
2075 return TokenNameEOF;
2079 if (doubleQuotedStringActive) {
2080 // ignore escaped characters in double quoted strings
2081 previousCharInLine = currentCharInLine;
2082 currentCharInLine = source[currentPositionInLine++];
2085 if (doubleQuotedStringActive) {
2086 doubleQuotedStringActive = false;
2088 if (!singleQuotedStringActive) {
2089 doubleQuotedStringActive = true;
2094 if (singleQuotedStringActive) {
2095 if (previousCharInLine != '\\') {
2096 singleQuotedStringActive = false;
2099 if (!doubleQuotedStringActive) {
2100 singleQuotedStringActive = true;
2106 return TokenNameINLINE_HTML;
2108 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2110 return TokenNameINLINE_HTML;
2114 if (previousCharInLine == '/' && !singleQuotedStringActive
2115 && !doubleQuotedStringActive) {
2117 return TokenNameINLINE_HTML;
2121 if (previousCharInLine == '/' && !singleQuotedStringActive
2122 && !doubleQuotedStringActive) {
2124 return TokenNameINLINE_HTML;
2129 } catch (IndexOutOfBoundsException e) {
2131 currentPosition = currentPositionInLine;
2132 return TokenNameINLINE_HTML;
2136 // public final void getNextUnicodeChar()
2137 // throws IndexOutOfBoundsException, InvalidInputException {
2139 // //handle the case of unicode.
2140 // //when a unicode appears then we must use a buffer that holds char
2142 // //At the end of this method currentCharacter holds the new visited char
2143 // //and currentPosition points right next after it
2145 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2147 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2148 // currentPosition++;
2149 // while (source[currentPosition] == 'u') {
2150 // currentPosition++;
2154 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2156 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2158 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2160 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2162 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2164 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2165 // //need the unicode buffer
2166 // if (withoutUnicodePtr == 0) {
2167 // //buffer all the entries that have been left aside....
2168 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2169 // System.arraycopy(
2172 // withoutUnicodeBuffer,
2174 // withoutUnicodePtr);
2176 // //fill the buffer with the char
2177 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2179 // unicodeAsBackSlash = currentCharacter == '\\';
2182 * Tokenize a method body, assuming that curly brackets are properly
2185 public final void jumpOverMethodBody() {
2186 this.wasAcr = false;
2189 while (true) { // loop for jumping over comments
2190 // ---------Consume white space and handles
2191 // startPosition---------
2192 boolean isWhiteSpace;
2194 startPosition = currentPosition;
2195 currentCharacter = source[currentPosition++];
2196 // if (((currentCharacter = source[currentPosition++]) ==
2198 // && (source[currentPosition] == 'u')) {
2199 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2201 if (recordLineSeparator
2202 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2203 pushLineSeparator();
2204 isWhiteSpace = Character.isWhitespace(currentCharacter);
2206 } while (isWhiteSpace);
2207 // -------consume token until } is found---------
2208 switch (currentCharacter) {
2219 test = getNextChar('\\');
2222 scanDoubleQuotedEscapeCharacter();
2223 } catch (InvalidInputException ex) {
2227 // try { // consume next character
2228 unicodeAsBackSlash = false;
2229 currentCharacter = source[currentPosition++];
2230 // if (((currentCharacter = source[currentPosition++])
2232 // && (source[currentPosition] == 'u')) {
2233 // getNextUnicodeChar();
2235 if (withoutUnicodePtr != 0) {
2236 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2239 // } catch (InvalidInputException ex) {
2247 // try { // consume next character
2248 unicodeAsBackSlash = false;
2249 currentCharacter = source[currentPosition++];
2250 // if (((currentCharacter = source[currentPosition++])
2252 // && (source[currentPosition] == 'u')) {
2253 // getNextUnicodeChar();
2255 if (withoutUnicodePtr != 0) {
2256 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2259 // } catch (InvalidInputException ex) {
2261 while (currentCharacter != '"') {
2262 if (currentCharacter == '\r') {
2263 if (source[currentPosition] == '\n')
2266 // the string cannot go further that the line
2268 if (currentCharacter == '\n') {
2270 // the string cannot go further that the line
2272 if (currentCharacter == '\\') {
2274 scanDoubleQuotedEscapeCharacter();
2275 } catch (InvalidInputException ex) {
2279 // try { // consume next character
2280 unicodeAsBackSlash = false;
2281 currentCharacter = source[currentPosition++];
2282 // if (((currentCharacter =
2283 // source[currentPosition++]) == '\\')
2284 // && (source[currentPosition] == 'u')) {
2285 // getNextUnicodeChar();
2287 if (withoutUnicodePtr != 0) {
2288 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2291 // } catch (InvalidInputException ex) {
2294 } catch (IndexOutOfBoundsException e) {
2300 if ((test = getNextChar('/', '*')) == 0) {
2303 // get the next char
2304 currentCharacter = source[currentPosition++];
2305 // if (((currentCharacter =
2306 // source[currentPosition++]) ==
2308 // && (source[currentPosition] == 'u')) {
2309 // //-------------unicode traitement ------------
2310 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2311 // currentPosition++;
2312 // while (source[currentPosition] == 'u') {
2313 // currentPosition++;
2316 // Character.getNumericValue(source[currentPosition++]))
2320 // Character.getNumericValue(source[currentPosition++]))
2324 // Character.getNumericValue(source[currentPosition++]))
2328 // Character.getNumericValue(source[currentPosition++]))
2331 // //error don't care of the value
2332 // currentCharacter = 'A';
2333 // } //something different from \n and \r
2335 // currentCharacter =
2336 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2339 while (currentCharacter != '\r'
2340 && currentCharacter != '\n') {
2341 // get the next char
2342 currentCharacter = source[currentPosition++];
2343 // if (((currentCharacter =
2344 // source[currentPosition++])
2346 // && (source[currentPosition] == 'u')) {
2347 // //-------------unicode traitement
2349 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2350 // currentPosition++;
2351 // while (source[currentPosition] == 'u') {
2352 // currentPosition++;
2355 // Character.getNumericValue(source[currentPosition++]))
2359 // Character.getNumericValue(source[currentPosition++]))
2363 // Character.getNumericValue(source[currentPosition++]))
2367 // Character.getNumericValue(source[currentPosition++]))
2370 // //error don't care of the value
2371 // currentCharacter = 'A';
2372 // } //something different from \n and \r
2374 // currentCharacter =
2375 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
2380 if (recordLineSeparator
2381 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2382 pushLineSeparator();
2383 } catch (IndexOutOfBoundsException e) {
2384 } // an eof will them be generated
2388 // traditional and annotation comment
2389 boolean star = false;
2390 // try { // consume next character
2391 unicodeAsBackSlash = false;
2392 currentCharacter = source[currentPosition++];
2393 // if (((currentCharacter = source[currentPosition++])
2395 // && (source[currentPosition] == 'u')) {
2396 // getNextUnicodeChar();
2398 if (withoutUnicodePtr != 0) {
2399 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2402 // } catch (InvalidInputException ex) {
2404 if (currentCharacter == '*') {
2407 if (recordLineSeparator
2408 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2409 pushLineSeparator();
2410 try { // get the next char
2411 currentCharacter = source[currentPosition++];
2412 // if (((currentCharacter =
2413 // source[currentPosition++]) ==
2415 // && (source[currentPosition] == 'u')) {
2416 // //-------------unicode traitement ------------
2417 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2418 // currentPosition++;
2419 // while (source[currentPosition] == 'u') {
2420 // currentPosition++;
2423 // Character.getNumericValue(source[currentPosition++]))
2427 // Character.getNumericValue(source[currentPosition++]))
2431 // Character.getNumericValue(source[currentPosition++]))
2435 // Character.getNumericValue(source[currentPosition++]))
2438 // //error don't care of the value
2439 // currentCharacter = 'A';
2440 // } //something different from * and /
2442 // currentCharacter =
2443 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2446 // loop until end of comment */
2447 while ((currentCharacter != '/') || (!star)) {
2448 if (recordLineSeparator
2449 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2450 pushLineSeparator();
2451 star = currentCharacter == '*';
2453 currentCharacter = source[currentPosition++];
2454 // if (((currentCharacter =
2455 // source[currentPosition++])
2457 // && (source[currentPosition] == 'u')) {
2458 // //-------------unicode traitement
2460 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2461 // currentPosition++;
2462 // while (source[currentPosition] == 'u') {
2463 // currentPosition++;
2466 // Character.getNumericValue(source[currentPosition++]))
2470 // Character.getNumericValue(source[currentPosition++]))
2474 // Character.getNumericValue(source[currentPosition++]))
2478 // Character.getNumericValue(source[currentPosition++]))
2481 // //error don't care of the value
2482 // currentCharacter = 'A';
2483 // } //something different from * and /
2485 // currentCharacter =
2486 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
2491 } catch (IndexOutOfBoundsException e) {
2499 if (isPHPIdentOrVarStart(currentCharacter)) {
2501 scanIdentifierOrKeyword((currentCharacter == '$'));
2502 } catch (InvalidInputException ex) {
2507 if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2508 // if (Character.isDigit(currentCharacter)) {
2511 } catch (InvalidInputException ex) {
2518 // -----------------end switch while try--------------------
2519 } catch (IndexOutOfBoundsException e) {
2520 } catch (InvalidInputException e) {
2525 // public final boolean jumpOverUnicodeWhiteSpace()
2526 // throws InvalidInputException {
2528 // //handle the case of unicode. Jump over the next whiteSpace
2529 // //making startPosition pointing on the next available char
2530 // //On false, the currentCharacter is filled up with a potential
2534 // this.wasAcr = false;
2535 // int c1, c2, c3, c4;
2536 // int unicodeSize = 6;
2537 // currentPosition++;
2538 // while (source[currentPosition] == 'u') {
2539 // currentPosition++;
2543 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2545 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2547 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2549 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2551 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2554 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2555 // if (recordLineSeparator
2556 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2557 // pushLineSeparator();
2558 // if (Character.isWhitespace(currentCharacter))
2561 // //buffer the new char which is not a white space
2562 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2563 // //withoutUnicodePtr == 1 is true here
2565 // } catch (IndexOutOfBoundsException e) {
2566 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2569 public final int[] getLineEnds() {
2570 // return a bounded copy of this.lineEnds
2572 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0,
2577 public char[] getSource() {
2581 public static boolean isIdentifierOrKeyword(int token) {
2582 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2585 final char[] optimizedCurrentTokenSource1() {
2586 // return always the same char[] build only once
2587 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2588 char charOne = source[startPosition];
2643 return new char[] { charOne };
2647 final char[] optimizedCurrentTokenSource2() {
2649 c0 = source[startPosition];
2650 c1 = source[startPosition + 1];
2652 // return always the same char[] build only once
2653 // optimization at no speed cost of 99.5 % of the
2654 // singleCharIdentifier
2657 return charArray_va;
2659 return charArray_vb;
2661 return charArray_vc;
2663 return charArray_vd;
2665 return charArray_ve;
2667 return charArray_vf;
2669 return charArray_vg;
2671 return charArray_vh;
2673 return charArray_vi;
2675 return charArray_vj;
2677 return charArray_vk;
2679 return charArray_vl;
2681 return charArray_vm;
2683 return charArray_vn;
2685 return charArray_vo;
2687 return charArray_vp;
2689 return charArray_vq;
2691 return charArray_vr;
2693 return charArray_vs;
2695 return charArray_vt;
2697 return charArray_vu;
2699 return charArray_vv;
2701 return charArray_vw;
2703 return charArray_vx;
2705 return charArray_vy;
2707 return charArray_vz;
2710 // try to return the same char[] build only once
2711 int hash = ((c0 << 6) + c1) % TableSize;
2712 char[][] table = charArray_length[0][hash];
2714 while (++i < InternalTableSize) {
2715 char[] charArray = table[i];
2716 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2719 // ---------other side---------
2721 int max = newEntry2;
2722 while (++i <= max) {
2723 char[] charArray = table[i];
2724 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2727 // --------add the entry-------
2728 if (++max >= InternalTableSize)
2731 table[max] = (r = new char[] { c0, c1 });
2736 final char[] optimizedCurrentTokenSource3() {
2737 // try to return the same char[] build only once
2739 int hash = (((c0 = source[startPosition]) << 12)
2740 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2742 char[][] table = charArray_length[1][hash];
2744 while (++i < InternalTableSize) {
2745 char[] charArray = table[i];
2746 if ((c0 == charArray[0]) && (c1 == charArray[1])
2747 && (c2 == charArray[2]))
2750 // ---------other side---------
2752 int max = newEntry3;
2753 while (++i <= max) {
2754 char[] charArray = table[i];
2755 if ((c0 == charArray[0]) && (c1 == charArray[1])
2756 && (c2 == charArray[2]))
2759 // --------add the entry-------
2760 if (++max >= InternalTableSize)
2763 table[max] = (r = new char[] { c0, c1, c2 });
2768 final char[] optimizedCurrentTokenSource4() {
2769 // try to return the same char[] build only once
2770 char c0, c1, c2, c3;
2771 long hash = ((((long) (c0 = source[startPosition])) << 18)
2772 + ((c1 = source[startPosition + 1]) << 12)
2773 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2775 char[][] table = charArray_length[2][(int) hash];
2777 while (++i < InternalTableSize) {
2778 char[] charArray = table[i];
2779 if ((c0 == charArray[0]) && (c1 == charArray[1])
2780 && (c2 == charArray[2]) && (c3 == charArray[3]))
2783 // ---------other side---------
2785 int max = newEntry4;
2786 while (++i <= max) {
2787 char[] charArray = table[i];
2788 if ((c0 == charArray[0]) && (c1 == charArray[1])
2789 && (c2 == charArray[2]) && (c3 == charArray[3]))
2792 // --------add the entry-------
2793 if (++max >= InternalTableSize)
2796 table[max] = (r = new char[] { c0, c1, c2, c3 });
2801 final char[] optimizedCurrentTokenSource5() {
2802 // try to return the same char[] build only once
2803 char c0, c1, c2, c3, c4;
2804 long hash = ((((long) (c0 = source[startPosition])) << 24)
2805 + (((long) (c1 = source[startPosition + 1])) << 18)
2806 + ((c2 = source[startPosition + 2]) << 12)
2807 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2809 char[][] table = charArray_length[3][(int) hash];
2811 while (++i < InternalTableSize) {
2812 char[] charArray = table[i];
2813 if ((c0 == charArray[0]) && (c1 == charArray[1])
2814 && (c2 == charArray[2]) && (c3 == charArray[3])
2815 && (c4 == charArray[4]))
2818 // ---------other side---------
2820 int max = newEntry5;
2821 while (++i <= max) {
2822 char[] charArray = table[i];
2823 if ((c0 == charArray[0]) && (c1 == charArray[1])
2824 && (c2 == charArray[2]) && (c3 == charArray[3])
2825 && (c4 == charArray[4]))
2828 // --------add the entry-------
2829 if (++max >= InternalTableSize)
2832 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2837 final char[] optimizedCurrentTokenSource6() {
2838 // try to return the same char[] build only once
2839 char c0, c1, c2, c3, c4, c5;
2840 long hash = ((((long) (c0 = source[startPosition])) << 32)
2841 + (((long) (c1 = source[startPosition + 1])) << 24)
2842 + (((long) (c2 = source[startPosition + 2])) << 18)
2843 + ((c3 = source[startPosition + 3]) << 12)
2844 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2846 char[][] table = charArray_length[4][(int) hash];
2848 while (++i < InternalTableSize) {
2849 char[] charArray = table[i];
2850 if ((c0 == charArray[0]) && (c1 == charArray[1])
2851 && (c2 == charArray[2]) && (c3 == charArray[3])
2852 && (c4 == charArray[4]) && (c5 == charArray[5]))
2855 // ---------other side---------
2857 int max = newEntry6;
2858 while (++i <= max) {
2859 char[] charArray = table[i];
2860 if ((c0 == charArray[0]) && (c1 == charArray[1])
2861 && (c2 == charArray[2]) && (c3 == charArray[3])
2862 && (c4 == charArray[4]) && (c5 == charArray[5]))
2865 // --------add the entry-------
2866 if (++max >= InternalTableSize)
2869 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2874 public final void pushLineSeparator() throws InvalidInputException {
2875 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2876 final int INCREMENT = 250;
2877 if (this.checkNonExternalizedStringLiterals) {
2878 // reinitialize the current line for non externalize strings purpose
2881 // currentCharacter is at position currentPosition-1
2883 if (currentCharacter == '\r') {
2884 int separatorPos = currentPosition - 1;
2885 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2887 // System.out.println("CR-" + separatorPos);
2889 lineEnds[++linePtr] = separatorPos;
2890 } catch (IndexOutOfBoundsException e) {
2891 // linePtr value is correct
2892 int oldLength = lineEnds.length;
2893 int[] old = lineEnds;
2894 lineEnds = new int[oldLength + INCREMENT];
2895 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2896 lineEnds[linePtr] = separatorPos;
2898 // look-ahead for merged cr+lf
2900 if (source[currentPosition] == '\n') {
2901 // System.out.println("look-ahead LF-" + currentPosition);
2902 lineEnds[linePtr] = currentPosition;
2908 } catch (IndexOutOfBoundsException e) {
2913 if (currentCharacter == '\n') {
2914 // must merge eventual cr followed by lf
2915 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2916 // System.out.println("merge LF-" + (currentPosition - 1));
2917 lineEnds[linePtr] = currentPosition - 1;
2919 int separatorPos = currentPosition - 1;
2920 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2922 // System.out.println("LF-" + separatorPos);
2924 lineEnds[++linePtr] = separatorPos;
2925 } catch (IndexOutOfBoundsException e) {
2926 // linePtr value is correct
2927 int oldLength = lineEnds.length;
2928 int[] old = lineEnds;
2929 lineEnds = new int[oldLength + INCREMENT];
2930 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2931 lineEnds[linePtr] = separatorPos;
2939 public final void pushUnicodeLineSeparator() {
2940 // isUnicode means that the \r or \n has been read as a unicode
2942 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2943 final int INCREMENT = 250;
2944 // currentCharacter is at position currentPosition-1
2945 if (this.checkNonExternalizedStringLiterals) {
2946 // reinitialize the current line for non externalize strings purpose
2950 if (currentCharacter == '\r') {
2951 int separatorPos = currentPosition - 6;
2952 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2954 // System.out.println("CR-" + separatorPos);
2956 lineEnds[++linePtr] = separatorPos;
2957 } catch (IndexOutOfBoundsException e) {
2958 // linePtr value is correct
2959 int oldLength = lineEnds.length;
2960 int[] old = lineEnds;
2961 lineEnds = new int[oldLength + INCREMENT];
2962 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2963 lineEnds[linePtr] = separatorPos;
2965 // look-ahead for merged cr+lf
2966 if (source[currentPosition] == '\n') {
2967 // System.out.println("look-ahead LF-" + currentPosition);
2968 lineEnds[linePtr] = currentPosition;
2976 if (currentCharacter == '\n') {
2977 // must merge eventual cr followed by lf
2978 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2979 // System.out.println("merge LF-" + (currentPosition - 1));
2980 lineEnds[linePtr] = currentPosition - 6;
2982 int separatorPos = currentPosition - 6;
2983 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2985 // System.out.println("LF-" + separatorPos);
2987 lineEnds[++linePtr] = separatorPos;
2988 } catch (IndexOutOfBoundsException e) {
2989 // linePtr value is correct
2990 int oldLength = lineEnds.length;
2991 int[] old = lineEnds;
2992 lineEnds = new int[oldLength + INCREMENT];
2993 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2994 lineEnds[linePtr] = separatorPos;
3002 public void recordComment(int token) {
3004 int stopPosition = this.currentPosition;
3006 case TokenNameCOMMENT_LINE:
3007 stopPosition = -this.lastCommentLinePosition;
3009 case TokenNameCOMMENT_BLOCK:
3010 stopPosition = -this.currentPosition;
3014 // a new comment is recorded
3015 int length = this.commentStops.length;
3016 if (++this.commentPtr >= length) {
3017 System.arraycopy(this.commentStops, 0,
3018 this.commentStops = new int[length + 30], 0, length);
3019 // grows the positions buffers too
3020 System.arraycopy(this.commentStarts, 0,
3021 this.commentStarts = new int[length + 30], 0, length);
3023 this.commentStops[this.commentPtr] = stopPosition;
3024 this.commentStarts[this.commentPtr] = this.startPosition;
3027 // public final void recordComment(boolean isJavadoc) {
3028 // // a new annotation comment is recorded
3030 // commentStops[++commentPtr] = isJavadoc
3031 // ? currentPosition
3032 // : -currentPosition;
3033 // } catch (IndexOutOfBoundsException e) {
3034 // int oldStackLength = commentStops.length;
3035 // int[] oldStack = commentStops;
3036 // commentStops = new int[oldStackLength + 30];
3037 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
3038 // commentStops[commentPtr] = isJavadoc ? currentPosition :
3039 // -currentPosition;
3040 // //grows the positions buffers too
3041 // int[] old = commentStarts;
3042 // commentStarts = new int[oldStackLength + 30];
3043 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
3045 // //the buffer is of a correct size here
3046 // commentStarts[commentPtr] = startPosition;
3048 public void resetTo(int begin, int end) {
3049 // reset the scanner to a given position where it may rescan again
3051 initialPosition = startPosition = currentPosition = begin;
3052 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
3053 commentPtr = -1; // reset comment stack
3056 public final void scanSingleQuotedEscapeCharacter()
3057 throws InvalidInputException {
3058 // the string with "\\u" is a legal string of two chars \ and u
3059 // thus we use a direct access to the source (for regular cases).
3060 // if (unicodeAsBackSlash) {
3061 // // consume next character
3062 // unicodeAsBackSlash = false;
3063 // if (((currentCharacter = source[currentPosition++]) == '\\')
3064 // && (source[currentPosition] == 'u')) {
3065 // getNextUnicodeChar();
3067 // if (withoutUnicodePtr != 0) {
3068 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3072 currentCharacter = source[currentPosition++];
3073 switch (currentCharacter) {
3075 currentCharacter = '\'';
3078 currentCharacter = '\\';
3081 currentCharacter = '\\';
3086 public final void scanDoubleQuotedEscapeCharacter()
3087 throws InvalidInputException {
3088 currentCharacter = source[currentPosition++];
3089 switch (currentCharacter) {
3091 // currentCharacter = '\b';
3094 currentCharacter = '\t';
3097 currentCharacter = '\n';
3100 // currentCharacter = '\f';
3103 currentCharacter = '\r';
3106 currentCharacter = '\"';
3109 currentCharacter = '\'';
3112 currentCharacter = '\\';
3115 currentCharacter = '$';
3118 // -----------octal escape--------------
3120 // OctalDigit OctalDigit
3121 // ZeroToThree OctalDigit OctalDigit
3122 int number = Character.getNumericValue(currentCharacter);
3123 if (number >= 0 && number <= 7) {
3124 boolean zeroToThreeNot = number > 3;
3126 .isDigit(currentCharacter = source[currentPosition++])) {
3127 int digit = Character.getNumericValue(currentCharacter);
3128 if (digit >= 0 && digit <= 7) {
3129 number = (number * 8) + digit;
3131 .isDigit(currentCharacter = source[currentPosition++])) {
3132 if (zeroToThreeNot) { // has read \NotZeroToThree
3134 // Digit --> ignore last character
3138 .getNumericValue(currentCharacter);
3139 if (digit >= 0 && digit <= 7) {
3140 // has read \ZeroToThree OctalDigit
3142 number = (number * 8) + digit;
3143 } else { // has read \ZeroToThree OctalDigit
3145 // --> ignore last character
3149 } else { // has read \OctalDigit NonDigit--> ignore
3154 } else { // has read \OctalDigit NonOctalDigit--> ignore
3159 } else { // has read \OctalDigit --> ignore last character
3163 throw new InvalidInputException(INVALID_ESCAPE);
3164 currentCharacter = (char) number;
3167 // throw new InvalidInputException(INVALID_ESCAPE);
3171 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3172 // return scanIdentifierOrKeyword( false );
3174 public int scanIdentifierOrKeyword(boolean isVariable)
3175 throws InvalidInputException {
3177 // first dispatch on the first char.
3178 // then the length. If there are several
3179 // keywors with the same length AND the same first char, then do another
3180 // disptach on the second char :-)...cool....but fast !
3181 useAssertAsAnIndentifier = false;
3182 while (getNextCharAsJavaIdentifierPart()) {
3186 // if (new String(getCurrentTokenSource()).equals("$this")) {
3187 // return TokenNamethis;
3189 return TokenNameVariable;
3194 // if (withoutUnicodePtr == 0)
3195 // quick test on length == 1 but not on length > 12 while most
3197 // have a length which is <= 12...but there are lots of identifier with
3198 // only one char....
3200 if ((length = currentPosition - startPosition) == 1)
3201 return TokenNameIdentifier;
3203 data = new char[length];
3204 index = startPosition;
3205 for (int i = 0; i < length; i++) {
3206 data[i] = Character.toLowerCase(source[index + i]);
3210 // if ((length = withoutUnicodePtr) == 1)
3211 // return TokenNameIdentifier;
3212 // // data = withoutUnicodeBuffer;
3213 // data = new char[withoutUnicodeBuffer.length];
3214 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3215 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3219 firstLetter = data[index];
3220 switch (firstLetter) {
3225 if ((data[++index] == '_') && (data[++index] == 'f')
3226 && (data[++index] == 'i') && (data[++index] == 'l')
3227 && (data[++index] == 'e') && (data[++index] == '_')
3228 && (data[++index] == '_'))
3229 return TokenNameFILE;
3230 index = 0; // __LINE__
3231 if ((data[++index] == '_') && (data[++index] == 'l')
3232 && (data[++index] == 'i') && (data[++index] == 'n')
3233 && (data[++index] == 'e') && (data[++index] == '_')
3234 && (data[++index] == '_'))
3235 return TokenNameLINE;
3239 if ((data[++index] == '_') && (data[++index] == 'c')
3240 && (data[++index] == 'l') && (data[++index] == 'a')
3241 && (data[++index] == 's') && (data[++index] == 's')
3242 && (data[++index] == '_') && (data[++index] == '_'))
3243 return TokenNameCLASS_C;
3247 if ((data[++index] == '_') && (data[++index] == 'm')
3248 && (data[++index] == 'e') && (data[++index] == 't')
3249 && (data[++index] == 'h') && (data[++index] == 'o')
3250 && (data[++index] == 'd') && (data[++index] == '_')
3251 && (data[++index] == '_'))
3252 return TokenNameMETHOD_C;
3256 if ((data[++index] == '_') && (data[++index] == 'f')
3257 && (data[++index] == 'u') && (data[++index] == 'n')
3258 && (data[++index] == 'c') && (data[++index] == 't')
3259 && (data[++index] == 'i') && (data[++index] == 'o')
3260 && (data[++index] == 'n') && (data[++index] == '_')
3261 && (data[++index] == '_'))
3262 return TokenNameFUNC_C;
3265 return TokenNameIdentifier;
3267 // as and array abstract
3271 if ((data[++index] == 's')) {
3274 return TokenNameIdentifier;
3277 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3278 return TokenNameand;
3280 return TokenNameIdentifier;
3283 if ((data[++index] == 'r') && (data[++index] == 'r')
3284 && (data[++index] == 'a') && (data[++index] == 'y'))
3285 return TokenNamearray;
3286 return TokenNameIdentifier;
3288 if ((data[++index] == 'b') && (data[++index] == 's')
3289 && (data[++index] == 't') && (data[++index] == 'r')
3290 && (data[++index] == 'a') && (data[++index] == 'c')
3291 && (data[++index] == 't'))
3292 return TokenNameabstract;
3293 return TokenNameIdentifier;
3295 return TokenNameIdentifier;
3300 if ((data[++index] == 'r') && (data[++index] == 'e')
3301 && (data[++index] == 'a') && (data[++index] == 'k'))
3302 return TokenNamebreak;
3303 return TokenNameIdentifier;
3305 return TokenNameIdentifier;
3307 // case catch class clone const continue
3310 if ((data[++index] == 'a') && (data[++index] == 's')
3311 && (data[++index] == 'e'))
3312 return TokenNamecase;
3313 return TokenNameIdentifier;
3315 if ((data[++index] == 'a') && (data[++index] == 't')
3316 && (data[++index] == 'c') && (data[++index] == 'h'))
3317 return TokenNamecatch;
3319 if ((data[++index] == 'l') && (data[++index] == 'a')
3320 && (data[++index] == 's') && (data[++index] == 's'))
3321 return TokenNameclass;
3323 if ((data[++index] == 'l') && (data[++index] == 'o')
3324 && (data[++index] == 'n') && (data[++index] == 'e'))
3325 return TokenNameclone;
3327 if ((data[++index] == 'o') && (data[++index] == 'n')
3328 && (data[++index] == 's') && (data[++index] == 't'))
3329 return TokenNameconst;
3330 return TokenNameIdentifier;
3332 if ((data[++index] == 'o') && (data[++index] == 'n')
3333 && (data[++index] == 't') && (data[++index] == 'i')
3334 && (data[++index] == 'n') && (data[++index] == 'u')
3335 && (data[++index] == 'e'))
3336 return TokenNamecontinue;
3337 return TokenNameIdentifier;
3339 return TokenNameIdentifier;
3341 // declare default do die
3342 // TODO delete define ==> no keyword !
3345 if ((data[++index] == 'o'))
3347 return TokenNameIdentifier;
3349 // if ((data[++index] == 'e')
3350 // && (data[++index] == 'f')
3351 // && (data[++index] == 'i')
3352 // && (data[++index] == 'n')
3353 // && (data[++index] == 'e'))
3354 // return TokenNamedefine;
3356 // return TokenNameIdentifier;
3358 if ((data[++index] == 'e') && (data[++index] == 'c')
3359 && (data[++index] == 'l') && (data[++index] == 'a')
3360 && (data[++index] == 'r') && (data[++index] == 'e'))
3361 return TokenNamedeclare;
3363 if ((data[++index] == 'e') && (data[++index] == 'f')
3364 && (data[++index] == 'a') && (data[++index] == 'u')
3365 && (data[++index] == 'l') && (data[++index] == 't'))
3366 return TokenNamedefault;
3367 return TokenNameIdentifier;
3369 return TokenNameIdentifier;
3371 // echo else exit elseif extends eval
3374 if ((data[++index] == 'c') && (data[++index] == 'h')
3375 && (data[++index] == 'o'))
3376 return TokenNameecho;
3377 else if ((data[index] == 'l') && (data[++index] == 's')
3378 && (data[++index] == 'e'))
3379 return TokenNameelse;
3380 else if ((data[index] == 'x') && (data[++index] == 'i')
3381 && (data[++index] == 't'))
3382 return TokenNameexit;
3383 else if ((data[index] == 'v') && (data[++index] == 'a')
3384 && (data[++index] == 'l'))
3385 return TokenNameeval;
3386 return TokenNameIdentifier;
3389 if ((data[++index] == 'n') && (data[++index] == 'd')
3390 && (data[++index] == 'i') && (data[++index] == 'f'))
3391 return TokenNameendif;
3392 if ((data[index] == 'm') && (data[++index] == 'p')
3393 && (data[++index] == 't') && (data[++index] == 'y'))
3394 return TokenNameempty;
3395 return TokenNameIdentifier;
3398 if ((data[++index] == 'n') && (data[++index] == 'd')
3399 && (data[++index] == 'f') && (data[++index] == 'o')
3400 && (data[++index] == 'r'))
3401 return TokenNameendfor;
3402 else if ((data[index] == 'l') && (data[++index] == 's')
3403 && (data[++index] == 'e') && (data[++index] == 'i')
3404 && (data[++index] == 'f'))
3405 return TokenNameelseif;
3406 return TokenNameIdentifier;
3408 if ((data[++index] == 'x') && (data[++index] == 't')
3409 && (data[++index] == 'e') && (data[++index] == 'n')
3410 && (data[++index] == 'd') && (data[++index] == 's'))
3411 return TokenNameextends;
3412 return TokenNameIdentifier;
3415 if ((data[++index] == 'n') && (data[++index] == 'd')
3416 && (data[++index] == 'w') && (data[++index] == 'h')
3417 && (data[++index] == 'i') && (data[++index] == 'l')
3418 && (data[++index] == 'e'))
3419 return TokenNameendwhile;
3420 return TokenNameIdentifier;
3423 if ((data[++index] == 'n') && (data[++index] == 'd')
3424 && (data[++index] == 's') && (data[++index] == 'w')
3425 && (data[++index] == 'i') && (data[++index] == 't')
3426 && (data[++index] == 'c') && (data[++index] == 'h'))
3427 return TokenNameendswitch;
3428 return TokenNameIdentifier;
3431 if ((data[++index] == 'n') && (data[++index] == 'd')
3432 && (data[++index] == 'd') && (data[++index] == 'e')
3433 && (data[++index] == 'c') && (data[++index] == 'l')
3434 && (data[++index] == 'a') && (data[++index] == 'r')
3435 && (data[++index] == 'e'))
3436 return TokenNameenddeclare;
3438 if ((data[++index] == 'n') // endforeach
3439 && (data[++index] == 'd')
3440 && (data[++index] == 'f')
3441 && (data[++index] == 'o')
3442 && (data[++index] == 'r')
3443 && (data[++index] == 'e')
3444 && (data[++index] == 'a')
3445 && (data[++index] == 'c') && (data[++index] == 'h'))
3446 return TokenNameendforeach;
3447 return TokenNameIdentifier;
3449 return TokenNameIdentifier;
3451 // for false final function
3454 if ((data[++index] == 'o') && (data[++index] == 'r'))
3455 return TokenNamefor;
3456 return TokenNameIdentifier;
3458 // if ((data[++index] == 'a') && (data[++index] == 'l')
3459 // && (data[++index] == 's') && (data[++index] == 'e'))
3460 // return TokenNamefalse;
3461 if ((data[++index] == 'i') && (data[++index] == 'n')
3462 && (data[++index] == 'a') && (data[++index] == 'l'))
3463 return TokenNamefinal;
3464 return TokenNameIdentifier;
3467 if ((data[++index] == 'o') && (data[++index] == 'r')
3468 && (data[++index] == 'e') && (data[++index] == 'a')
3469 && (data[++index] == 'c') && (data[++index] == 'h'))
3470 return TokenNameforeach;
3471 return TokenNameIdentifier;
3474 if ((data[++index] == 'u') && (data[++index] == 'n')
3475 && (data[++index] == 'c') && (data[++index] == 't')
3476 && (data[++index] == 'i') && (data[++index] == 'o')
3477 && (data[++index] == 'n'))
3478 return TokenNamefunction;
3479 return TokenNameIdentifier;
3481 return TokenNameIdentifier;
3485 if ((data[++index] == 'l') && (data[++index] == 'o')
3486 && (data[++index] == 'b') && (data[++index] == 'a')
3487 && (data[++index] == 'l')) {
3488 return TokenNameglobal;
3491 return TokenNameIdentifier;
3493 // if int isset include include_once instanceof interface implements
3496 if (data[++index] == 'f')
3498 return TokenNameIdentifier;
3500 // if ((data[++index] == 'n') && (data[++index] == 't'))
3501 // return TokenNameint;
3503 // return TokenNameIdentifier;
3505 if ((data[++index] == 's') && (data[++index] == 's')
3506 && (data[++index] == 'e') && (data[++index] == 't'))
3507 return TokenNameisset;
3508 return TokenNameIdentifier;
3510 if ((data[++index] == 'n') && (data[++index] == 'c')
3511 && (data[++index] == 'l') && (data[++index] == 'u')
3512 && (data[++index] == 'd') && (data[++index] == 'e'))
3513 return TokenNameinclude;
3514 return TokenNameIdentifier;
3517 if ((data[++index] == 'n') && (data[++index] == 't')
3518 && (data[++index] == 'e') && (data[++index] == 'r')
3519 && (data[++index] == 'f') && (data[++index] == 'a')
3520 && (data[++index] == 'c') && (data[++index] == 'e'))
3521 return TokenNameinterface;
3522 return TokenNameIdentifier;
3525 if ((data[++index] == 'n') && (data[++index] == 's')
3526 && (data[++index] == 't') && (data[++index] == 'a')
3527 && (data[++index] == 'n') && (data[++index] == 'c')
3528 && (data[++index] == 'e') && (data[++index] == 'o')
3529 && (data[++index] == 'f'))
3530 return TokenNameinstanceof;
3531 if ((data[index] == 'm') && (data[++index] == 'p')
3532 && (data[++index] == 'l') && (data[++index] == 'e')
3533 && (data[++index] == 'm') && (data[++index] == 'e')
3534 && (data[++index] == 'n') && (data[++index] == 't')
3535 && (data[++index] == 's'))
3536 return TokenNameimplements;
3537 return TokenNameIdentifier;
3539 if ((data[++index] == 'n') && (data[++index] == 'c')
3540 && (data[++index] == 'l') && (data[++index] == 'u')
3541 && (data[++index] == 'd') && (data[++index] == 'e')
3542 && (data[++index] == '_') && (data[++index] == 'o')
3543 && (data[++index] == 'n') && (data[++index] == 'c')
3544 && (data[++index] == 'e'))
3545 return TokenNameinclude_once;
3546 return TokenNameIdentifier;
3548 return TokenNameIdentifier;
3552 if ((data[++index] == 'i') && (data[++index] == 's')
3553 && (data[++index] == 't')) {
3554 return TokenNamelist;
3557 return TokenNameIdentifier;
3562 if ((data[++index] == 'e') && (data[++index] == 'w'))
3563 return TokenNamenew;
3564 return TokenNameIdentifier;
3566 // if ((data[++index] == 'u') && (data[++index] == 'l')
3567 // && (data[++index] == 'l'))
3568 // return TokenNamenull;
3570 // return TokenNameIdentifier;
3572 return TokenNameIdentifier;
3576 if (data[++index] == 'r') {
3580 // if (length == 12) {
3581 // if ((data[++index] == 'l')
3582 // && (data[++index] == 'd')
3583 // && (data[++index] == '_')
3584 // && (data[++index] == 'f')
3585 // && (data[++index] == 'u')
3586 // && (data[++index] == 'n')
3587 // && (data[++index] == 'c')
3588 // && (data[++index] == 't')
3589 // && (data[++index] == 'i')
3590 // && (data[++index] == 'o')
3591 // && (data[++index] == 'n')) {
3592 // return TokenNameold_function;
3595 return TokenNameIdentifier;
3597 // print public private protected
3600 if ((data[++index] == 'r') && (data[++index] == 'i')
3601 && (data[++index] == 'n') && (data[++index] == 't')) {
3602 return TokenNameprint;
3604 return TokenNameIdentifier;
3606 if ((data[++index] == 'u') && (data[++index] == 'b')
3607 && (data[++index] == 'l') && (data[++index] == 'i')
3608 && (data[++index] == 'c')) {
3609 return TokenNamepublic;
3611 return TokenNameIdentifier;
3613 if ((data[++index] == 'r') && (data[++index] == 'i')
3614 && (data[++index] == 'v') && (data[++index] == 'a')
3615 && (data[++index] == 't') && (data[++index] == 'e')) {
3616 return TokenNameprivate;
3618 return TokenNameIdentifier;
3620 if ((data[++index] == 'r') && (data[++index] == 'o')
3621 && (data[++index] == 't') && (data[++index] == 'e')
3622 && (data[++index] == 'c') && (data[++index] == 't')
3623 && (data[++index] == 'e') && (data[++index] == 'd')) {
3624 return TokenNameprotected;
3626 return TokenNameIdentifier;
3628 return TokenNameIdentifier;
3630 // return require require_once
3632 if ((data[++index] == 'e') && (data[++index] == 't')
3633 && (data[++index] == 'u') && (data[++index] == 'r')
3634 && (data[++index] == 'n')) {
3635 return TokenNamereturn;
3637 } else if (length == 7) {
3638 if ((data[++index] == 'e') && (data[++index] == 'q')
3639 && (data[++index] == 'u') && (data[++index] == 'i')
3640 && (data[++index] == 'r') && (data[++index] == 'e')) {
3641 return TokenNamerequire;
3643 } else if (length == 12) {
3644 if ((data[++index] == 'e') && (data[++index] == 'q')
3645 && (data[++index] == 'u') && (data[++index] == 'i')
3646 && (data[++index] == 'r') && (data[++index] == 'e')
3647 && (data[++index] == '_') && (data[++index] == 'o')
3648 && (data[++index] == 'n') && (data[++index] == 'c')
3649 && (data[++index] == 'e')) {
3650 return TokenNamerequire_once;
3653 return TokenNameIdentifier;
3655 // self static switch
3658 // if ((data[++index] == 'e') && (data[++index] == 'l') &&
3661 // return TokenNameself;
3663 // return TokenNameIdentifier;
3665 if (data[++index] == 't')
3666 if ((data[++index] == 'a') && (data[++index] == 't')
3667 && (data[++index] == 'i') && (data[++index] == 'c')) {
3668 return TokenNamestatic;
3670 return TokenNameIdentifier;
3671 else if ((data[index] == 'w') && (data[++index] == 'i')
3672 && (data[++index] == 't') && (data[++index] == 'c')
3673 && (data[++index] == 'h'))
3674 return TokenNameswitch;
3676 return TokenNameIdentifier;
3681 if ((data[++index] == 'r') && (data[++index] == 'y'))
3682 return TokenNametry;
3684 // if ((data[++index] == 'r') && (data[++index] == 'u')
3685 // && (data[++index] == 'e'))
3686 // return TokenNametrue;
3688 return TokenNameIdentifier;
3690 if ((data[++index] == 'h') && (data[++index] == 'r')
3691 && (data[++index] == 'o') && (data[++index] == 'w'))
3692 return TokenNamethrow;
3694 return TokenNameIdentifier;
3699 if ((data[++index] == 's') && (data[++index] == 'e'))
3700 return TokenNameuse;
3702 if ((data[++index] == 'n') && (data[++index] == 's')
3703 && (data[++index] == 'e') && (data[++index] == 't'))
3704 return TokenNameunset;
3706 return TokenNameIdentifier;
3711 if ((data[++index] == 'a') && (data[++index] == 'r'))
3712 return TokenNamevar;
3714 return TokenNameIdentifier;
3719 if ((data[++index] == 'h') && (data[++index] == 'i')
3720 && (data[++index] == 'l') && (data[++index] == 'e'))
3721 return TokenNamewhile;
3722 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3723 // (data[++index]=='e') && (data[++index]=='f')&&
3724 // (data[++index]=='p'))
3725 // return TokenNamewidefp ;
3727 // return TokenNameIdentifier;
3729 return TokenNameIdentifier;
3734 if ((data[++index] == 'o') && (data[++index] == 'r'))
3735 return TokenNamexor;
3737 return TokenNameIdentifier;
3739 return TokenNameIdentifier;
3741 return TokenNameIdentifier;
3744 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3745 // when entering this method the currentCharacter is the firt
3746 // digit of the number , i.e. it may be preceeded by a . when
3747 // dotPrefix is true
3748 boolean floating = dotPrefix;
3749 if ((!dotPrefix) && (currentCharacter == '0')) {
3750 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3751 // force the first char of the hexa number do exist...
3752 // consume next character
3753 unicodeAsBackSlash = false;
3754 currentCharacter = source[currentPosition++];
3755 // if (((currentCharacter = source[currentPosition++]) == '\\')
3756 // && (source[currentPosition] == 'u')) {
3757 // getNextUnicodeChar();
3759 // if (withoutUnicodePtr != 0) {
3760 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3763 if (Character.digit(currentCharacter, 16) == -1)
3764 throw new InvalidInputException(INVALID_HEXA);
3766 while (getNextCharAsDigit(16)) {
3769 // if (getNextChar('l', 'L') >= 0)
3770 // return TokenNameLongLiteral;
3772 return TokenNameIntegerLiteral;
3774 // there is x or X in the number
3775 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3776 // 00078.0 is true !!!!! crazy language
3777 if (getNextCharAsDigit()) {
3778 // -------------potential octal-----------------
3779 while (getNextCharAsDigit()) {
3782 // if (getNextChar('l', 'L') >= 0) {
3783 // return TokenNameLongLiteral;
3786 // if (getNextChar('f', 'F') >= 0) {
3787 // return TokenNameFloatingPointLiteral;
3789 if (getNextChar('d', 'D') >= 0) {
3790 return TokenNameDoubleLiteral;
3791 } else { // make the distinction between octal and float ....
3792 if (getNextChar('.')) { // bingo ! ....
3793 while (getNextCharAsDigit()) {
3796 if (getNextChar('e', 'E') >= 0) {
3797 // consume next character
3798 unicodeAsBackSlash = false;
3799 currentCharacter = source[currentPosition++];
3800 // if (((currentCharacter =
3801 // source[currentPosition++]) == '\\')
3802 // && (source[currentPosition] == 'u')) {
3803 // getNextUnicodeChar();
3805 // if (withoutUnicodePtr != 0) {
3806 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3807 // currentCharacter;
3810 if ((currentCharacter == '-')
3811 || (currentCharacter == '+')) {
3812 // consume next character
3813 unicodeAsBackSlash = false;
3814 currentCharacter = source[currentPosition++];
3815 // if (((currentCharacter =
3816 // source[currentPosition++]) == '\\')
3817 // && (source[currentPosition] == 'u')) {
3818 // getNextUnicodeChar();
3820 // if (withoutUnicodePtr != 0) {
3821 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3822 // currentCharacter;
3826 if (!Character.isDigit(currentCharacter))
3827 throw new InvalidInputException(INVALID_FLOAT);
3828 while (getNextCharAsDigit()) {
3832 // if (getNextChar('f', 'F') >= 0)
3833 // return TokenNameFloatingPointLiteral;
3834 getNextChar('d', 'D'); // jump over potential d or D
3835 return TokenNameDoubleLiteral;
3837 return TokenNameIntegerLiteral;
3844 while (getNextCharAsDigit()) {
3847 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3848 // return TokenNameLongLiteral;
3849 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be
3851 while (getNextCharAsDigit()) {
3856 // if floating is true both exponant and suffix may be optional
3857 if (getNextChar('e', 'E') >= 0) {
3859 // consume next character
3860 unicodeAsBackSlash = false;
3861 currentCharacter = source[currentPosition++];
3862 // if (((currentCharacter = source[currentPosition++]) == '\\')
3863 // && (source[currentPosition] == 'u')) {
3864 // getNextUnicodeChar();
3866 // if (withoutUnicodePtr != 0) {
3867 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3870 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3873 unicodeAsBackSlash = false;
3874 currentCharacter = source[currentPosition++];
3875 // if (((currentCharacter = source[currentPosition++]) == '\\')
3876 // && (source[currentPosition] == 'u')) {
3877 // getNextUnicodeChar();
3879 // if (withoutUnicodePtr != 0) {
3880 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3884 if (!Character.isDigit(currentCharacter))
3885 throw new InvalidInputException(INVALID_FLOAT);
3886 while (getNextCharAsDigit()) {
3890 if (getNextChar('d', 'D') >= 0)
3891 return TokenNameDoubleLiteral;
3892 // if (getNextChar('f', 'F') >= 0)
3893 // return TokenNameFloatingPointLiteral;
3894 // the long flag has been tested before
3895 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3899 * Search the line number corresponding to a specific position
3902 public final int getLineNumber(int position) {
3903 if (lineEnds == null)
3905 int length = linePtr + 1;
3908 int g = 0, d = length - 1;
3912 if (position < lineEnds[m]) {
3914 } else if (position > lineEnds[m]) {
3920 if (position < lineEnds[m]) {
3926 public void setPHPMode(boolean mode) {
3930 public final void setSource(char[] source) {
3931 setSource(null, source);
3934 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3935 // the source-buffer is set to sourceString
3936 this.compilationUnit = compilationUnit;
3937 if (source == null) {
3938 this.source = new char[0];
3940 this.source = source;
3943 initialPosition = currentPosition = 0;
3944 containsAssertKeyword = false;
3945 withoutUnicodeBuffer = new char[this.source.length];
3946 fFillerToken = TokenNameEOF;
3947 // encapsedStringStack = new Stack();
3950 public String toString() {
3951 if (startPosition == source.length)
3952 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3953 if (currentPosition > source.length)
3954 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3955 char front[] = new char[startPosition];
3956 System.arraycopy(source, 0, front, 0, startPosition);
3957 int middleLength = (currentPosition - 1) - startPosition + 1;
3959 if (middleLength > -1) {
3960 middle = new char[middleLength];
3961 System.arraycopy(source, startPosition, middle, 0, middleLength);
3963 middle = new char[0];
3965 char end[] = new char[source.length - (currentPosition - 1)];
3966 System.arraycopy(source, (currentPosition - 1) + 1, end, 0,
3967 source.length - (currentPosition - 1) - 1);
3968 return new String(front)
3969 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3970 + new String(middle)
3971 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3975 public final String toStringAction(int act) {
3977 case TokenNameERROR:
3978 return "ScannerError"; // + new String(getCurrentTokenSource()) +
3981 case TokenNameINLINE_HTML:
3982 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3983 case TokenNameECHO_INVISIBLE:
3986 case TokenNameIdentifier:
3987 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3988 case TokenNameVariable:
3989 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3990 case TokenNameabstract:
3991 return "abstract"; //$NON-NLS-1$
3993 return "AND"; //$NON-NLS-1$
3994 case TokenNamearray:
3995 return "array"; //$NON-NLS-1$
3997 return "as"; //$NON-NLS-1$
3998 case TokenNamebreak:
3999 return "break"; //$NON-NLS-1$
4001 return "case"; //$NON-NLS-1$
4002 case TokenNameclass:
4003 return "class"; //$NON-NLS-1$
4004 case TokenNamecatch:
4005 return "catch"; //$NON-NLS-1$
4006 case TokenNameclone:
4009 case TokenNameconst:
4012 case TokenNamecontinue:
4013 return "continue"; //$NON-NLS-1$
4014 case TokenNamedefault:
4015 return "default"; //$NON-NLS-1$
4016 // case TokenNamedefine :
4017 // return "define"; //$NON-NLS-1$
4019 return "do"; //$NON-NLS-1$
4021 return "echo"; //$NON-NLS-1$
4023 return "else"; //$NON-NLS-1$
4024 case TokenNameelseif:
4025 return "elseif"; //$NON-NLS-1$
4026 case TokenNameendfor:
4027 return "endfor"; //$NON-NLS-1$
4028 case TokenNameendforeach:
4029 return "endforeach"; //$NON-NLS-1$
4030 case TokenNameendif:
4031 return "endif"; //$NON-NLS-1$
4032 case TokenNameendswitch:
4033 return "endswitch"; //$NON-NLS-1$
4034 case TokenNameendwhile:
4035 return "endwhile"; //$NON-NLS-1$
4038 case TokenNameextends:
4039 return "extends"; //$NON-NLS-1$
4040 // case TokenNamefalse :
4041 // return "false"; //$NON-NLS-1$
4042 case TokenNamefinal:
4043 return "final"; //$NON-NLS-1$
4045 return "for"; //$NON-NLS-1$
4046 case TokenNameforeach:
4047 return "foreach"; //$NON-NLS-1$
4048 case TokenNamefunction:
4049 return "function"; //$NON-NLS-1$
4050 case TokenNameglobal:
4051 return "global"; //$NON-NLS-1$
4053 return "if"; //$NON-NLS-1$
4054 case TokenNameimplements:
4055 return "implements"; //$NON-NLS-1$
4056 case TokenNameinclude:
4057 return "include"; //$NON-NLS-1$
4058 case TokenNameinclude_once:
4059 return "include_once"; //$NON-NLS-1$
4060 case TokenNameinstanceof:
4061 return "instanceof"; //$NON-NLS-1$
4062 case TokenNameinterface:
4063 return "interface"; //$NON-NLS-1$
4064 case TokenNameisset:
4065 return "isset"; //$NON-NLS-1$
4067 return "list"; //$NON-NLS-1$
4069 return "new"; //$NON-NLS-1$
4070 // case TokenNamenull :
4071 // return "null"; //$NON-NLS-1$
4073 return "OR"; //$NON-NLS-1$
4074 case TokenNameprint:
4075 return "print"; //$NON-NLS-1$
4076 case TokenNameprivate:
4077 return "private"; //$NON-NLS-1$
4078 case TokenNameprotected:
4079 return "protected"; //$NON-NLS-1$
4080 case TokenNamepublic:
4081 return "public"; //$NON-NLS-1$
4082 case TokenNamerequire:
4083 return "require"; //$NON-NLS-1$
4084 case TokenNamerequire_once:
4085 return "require_once"; //$NON-NLS-1$
4086 case TokenNamereturn:
4087 return "return"; //$NON-NLS-1$
4088 // case TokenNameself:
4089 // return "self"; //$NON-NLS-1$
4090 case TokenNamestatic:
4091 return "static"; //$NON-NLS-1$
4092 case TokenNameswitch:
4093 return "switch"; //$NON-NLS-1$
4094 // case TokenNametrue :
4095 // return "true"; //$NON-NLS-1$
4096 case TokenNameunset:
4097 return "unset"; //$NON-NLS-1$
4099 return "var"; //$NON-NLS-1$
4100 case TokenNamewhile:
4101 return "while"; //$NON-NLS-1$
4103 return "XOR"; //$NON-NLS-1$
4104 // case TokenNamethis :
4105 // return "$this"; //$NON-NLS-1$
4106 case TokenNameIntegerLiteral:
4107 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4108 case TokenNameDoubleLiteral:
4109 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4110 case TokenNameStringDoubleQuote:
4111 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4112 case TokenNameStringSingleQuote:
4113 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4114 case TokenNameStringInterpolated:
4115 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4116 case TokenNameEncapsedString0:
4117 return "`"; //$NON-NLS-1$
4118 // case TokenNameEncapsedString1:
4119 // return "\'"; //$NON-NLS-1$
4120 // case TokenNameEncapsedString2:
4121 // return "\""; //$NON-NLS-1$
4122 case TokenNameSTRING:
4123 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4124 case TokenNameHEREDOC:
4125 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4126 case TokenNamePLUS_PLUS:
4127 return "++"; //$NON-NLS-1$
4128 case TokenNameMINUS_MINUS:
4129 return "--"; //$NON-NLS-1$
4130 case TokenNameEQUAL_EQUAL:
4131 return "=="; //$NON-NLS-1$
4132 case TokenNameEQUAL_EQUAL_EQUAL:
4133 return "==="; //$NON-NLS-1$
4134 case TokenNameEQUAL_GREATER:
4135 return "=>"; //$NON-NLS-1$
4136 case TokenNameLESS_EQUAL:
4137 return "<="; //$NON-NLS-1$
4138 case TokenNameGREATER_EQUAL:
4139 return ">="; //$NON-NLS-1$
4140 case TokenNameNOT_EQUAL:
4141 return "!="; //$NON-NLS-1$
4142 case TokenNameNOT_EQUAL_EQUAL:
4143 return "!=="; //$NON-NLS-1$
4144 case TokenNameLEFT_SHIFT:
4145 return "<<"; //$NON-NLS-1$
4146 case TokenNameRIGHT_SHIFT:
4147 return ">>"; //$NON-NLS-1$
4148 case TokenNamePLUS_EQUAL:
4149 return "+="; //$NON-NLS-1$
4150 case TokenNameMINUS_EQUAL:
4151 return "-="; //$NON-NLS-1$
4152 case TokenNameMULTIPLY_EQUAL:
4153 return "*="; //$NON-NLS-1$
4154 case TokenNameDIVIDE_EQUAL:
4155 return "/="; //$NON-NLS-1$
4156 case TokenNameAND_EQUAL:
4157 return "&="; //$NON-NLS-1$
4158 case TokenNameOR_EQUAL:
4159 return "|="; //$NON-NLS-1$
4160 case TokenNameXOR_EQUAL:
4161 return "^="; //$NON-NLS-1$
4162 case TokenNameREMAINDER_EQUAL:
4163 return "%="; //$NON-NLS-1$
4164 case TokenNameDOT_EQUAL:
4165 return ".="; //$NON-NLS-1$
4166 case TokenNameLEFT_SHIFT_EQUAL:
4167 return "<<="; //$NON-NLS-1$
4168 case TokenNameRIGHT_SHIFT_EQUAL:
4169 return ">>="; //$NON-NLS-1$
4170 case TokenNameOR_OR:
4171 return "||"; //$NON-NLS-1$
4172 case TokenNameAND_AND:
4173 return "&&"; //$NON-NLS-1$
4175 return "+"; //$NON-NLS-1$
4176 case TokenNameMINUS:
4177 return "-"; //$NON-NLS-1$
4178 case TokenNameMINUS_GREATER:
4181 return "!"; //$NON-NLS-1$
4182 case TokenNameREMAINDER:
4183 return "%"; //$NON-NLS-1$
4185 return "^"; //$NON-NLS-1$
4187 return "&"; //$NON-NLS-1$
4188 case TokenNameMULTIPLY:
4189 return "*"; //$NON-NLS-1$
4191 return "|"; //$NON-NLS-1$
4192 case TokenNameTWIDDLE:
4193 return "~"; //$NON-NLS-1$
4194 case TokenNameTWIDDLE_EQUAL:
4195 return "~="; //$NON-NLS-1$
4196 case TokenNameDIVIDE:
4197 return "/"; //$NON-NLS-1$
4198 case TokenNameGREATER:
4199 return ">"; //$NON-NLS-1$
4201 return "<"; //$NON-NLS-1$
4202 case TokenNameLPAREN:
4203 return "("; //$NON-NLS-1$
4204 case TokenNameRPAREN:
4205 return ")"; //$NON-NLS-1$
4206 case TokenNameLBRACE:
4207 return "{"; //$NON-NLS-1$
4208 case TokenNameRBRACE:
4209 return "}"; //$NON-NLS-1$
4210 case TokenNameLBRACKET:
4211 return "["; //$NON-NLS-1$
4212 case TokenNameRBRACKET:
4213 return "]"; //$NON-NLS-1$
4214 case TokenNameSEMICOLON:
4215 return ";"; //$NON-NLS-1$
4216 case TokenNameQUESTION:
4217 return "?"; //$NON-NLS-1$
4218 case TokenNameCOLON:
4219 return ":"; //$NON-NLS-1$
4220 case TokenNameCOMMA:
4221 return ","; //$NON-NLS-1$
4223 return "."; //$NON-NLS-1$
4224 case TokenNameEQUAL:
4225 return "="; //$NON-NLS-1$
4228 case TokenNameDOLLAR:
4230 case TokenNameDOLLAR_LBRACE:
4232 case TokenNameLBRACE_DOLLAR:
4235 return "EOF"; //$NON-NLS-1$
4236 case TokenNameWHITESPACE:
4237 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4238 case TokenNameCOMMENT_LINE:
4239 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4240 case TokenNameCOMMENT_BLOCK:
4241 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4242 case TokenNameCOMMENT_PHPDOC:
4243 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4244 // case TokenNameHTML :
4245 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4248 return "__FILE__"; //$NON-NLS-1$
4250 return "__LINE__"; //$NON-NLS-1$
4251 case TokenNameCLASS_C:
4252 return "__CLASS__"; //$NON-NLS-1$
4253 case TokenNameMETHOD_C:
4254 return "__METHOD__"; //$NON-NLS-1$
4255 case TokenNameFUNC_C:
4256 return "__FUNCTION__"; //$NON-NLS-1
4257 case TokenNameboolCAST:
4258 return "( bool )"; //$NON-NLS-1$
4259 case TokenNameintCAST:
4260 return "( int )"; //$NON-NLS-1$
4261 case TokenNamedoubleCAST:
4262 return "( double )"; //$NON-NLS-1$
4263 case TokenNameobjectCAST:
4264 return "( object )"; //$NON-NLS-1$
4265 case TokenNamestringCAST:
4266 return "( string )"; //$NON-NLS-1$
4268 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4276 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4277 this(tokenizeComments, tokenizeWhiteSpace, false);
4280 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4281 boolean checkNonExternalizedStringLiterals) {
4282 this(tokenizeComments, tokenizeWhiteSpace,
4283 checkNonExternalizedStringLiterals, false);
4286 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4287 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
4288 this(tokenizeComments, tokenizeWhiteSpace,
4289 checkNonExternalizedStringLiterals, assertMode, false, null,
4293 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4294 boolean checkNonExternalizedStringLiterals, boolean assertMode,
4295 boolean tokenizeStrings, char[][] taskTags,
4296 char[][] taskPriorities, boolean isTaskCaseSensitive) {
4297 this.eofPosition = Integer.MAX_VALUE;
4298 this.tokenizeComments = tokenizeComments;
4299 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4300 this.tokenizeStrings = tokenizeStrings;
4301 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4302 // this.assertMode = assertMode;
4303 // this.encapsedStringStack = null;
4304 this.taskTags = taskTags;
4305 this.taskPriorities = taskPriorities;
4308 private void checkNonExternalizeString() throws InvalidInputException {
4309 if (currentLine == null)
4311 parseTags(currentLine);
4314 private void parseTags(NLSLine line) throws InvalidInputException {
4315 String s = new String(getCurrentTokenSource());
4316 int pos = s.indexOf(TAG_PREFIX);
4317 int lineLength = line.size();
4319 int start = pos + TAG_PREFIX_LENGTH;
4320 int end = s.indexOf(TAG_POSTFIX, start);
4321 String index = s.substring(start, end);
4324 i = Integer.parseInt(index) - 1;
4325 // Tags are one based not zero based.
4326 } catch (NumberFormatException e) {
4327 i = -1; // we don't want to consider this as a valid NLS tag
4329 if (line.exists(i)) {
4332 pos = s.indexOf(TAG_PREFIX, start);
4334 this.nonNLSStrings = new StringLiteral[lineLength];
4335 int nonNLSCounter = 0;
4336 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4337 StringLiteral literal = (StringLiteral) iterator.next();
4338 if (literal != null) {
4339 this.nonNLSStrings[nonNLSCounter++] = literal;
4342 if (nonNLSCounter == 0) {
4343 this.nonNLSStrings = null;
4347 this.wasNonExternalizedStringLiteral = true;
4348 if (nonNLSCounter != lineLength) {
4349 System.arraycopy(this.nonNLSStrings, 0,
4350 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
4356 public final void scanEscapeCharacter() throws InvalidInputException {
4357 // the string with "\\u" is a legal string of two chars \ and u
4358 // thus we use a direct access to the source (for regular cases).
4359 if (unicodeAsBackSlash) {
4360 // consume next character
4361 unicodeAsBackSlash = false;
4362 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4363 // (source[currentPosition] == 'u')) {
4364 // getNextUnicodeChar();
4366 if (withoutUnicodePtr != 0) {
4367 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4371 currentCharacter = source[currentPosition++];
4372 switch (currentCharacter) {
4374 currentCharacter = '\b';
4377 currentCharacter = '\t';
4380 currentCharacter = '\n';
4383 currentCharacter = '\f';
4386 currentCharacter = '\r';
4389 currentCharacter = '\"';
4392 currentCharacter = '\'';
4395 currentCharacter = '\\';
4398 // -----------octal escape--------------
4400 // OctalDigit OctalDigit
4401 // ZeroToThree OctalDigit OctalDigit
4402 int number = Character.getNumericValue(currentCharacter);
4403 if (number >= 0 && number <= 7) {
4404 boolean zeroToThreeNot = number > 3;
4406 .isDigit(currentCharacter = source[currentPosition++])) {
4407 int digit = Character.getNumericValue(currentCharacter);
4408 if (digit >= 0 && digit <= 7) {
4409 number = (number * 8) + digit;
4411 .isDigit(currentCharacter = source[currentPosition++])) {
4412 if (zeroToThreeNot) { // has read \NotZeroToThree
4414 // Digit --> ignore last character
4418 .getNumericValue(currentCharacter);
4419 if (digit >= 0 && digit <= 7) { // has read
4421 // OctalDigit OctalDigit
4422 number = (number * 8) + digit;
4423 } else { // has read \ZeroToThree OctalDigit
4425 // --> ignore last character
4429 } else { // has read \OctalDigit NonDigit--> ignore
4434 } else { // has read \OctalDigit NonOctalDigit--> ignore
4439 } else { // has read \OctalDigit --> ignore last character
4443 throw new InvalidInputException(INVALID_ESCAPE);
4444 currentCharacter = (char) number;
4446 throw new InvalidInputException(INVALID_ESCAPE);
4450 // chech presence of task: tags
4451 // TODO (frederic) see if we need to take unicode characters into account...
4452 public void checkTaskTag(int commentStart, int commentEnd) {
4453 char[] src = this.source;
4455 // only look for newer task: tags
4456 if (this.foundTaskCount > 0
4457 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4460 int foundTaskIndex = this.foundTaskCount;
4461 char previous = src[commentStart + 1]; // should be '*' or '/'
4462 nextChar: for (int i = commentStart + 2; i < commentEnd
4463 && i < this.eofPosition; i++) {
4465 char[] priority = null;
4466 // check for tag occurrence only if not ambiguous with javadoc tag
4467 if (previous != '@') {
4468 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4469 tag = this.taskTags[itag];
4470 int tagLength = tag.length;
4474 // ensure tag is not leaded with letter if tag starts with a
4476 if (Scanner.isPHPIdentifierStart(tag[0])) {
4477 if (Scanner.isPHPIdentifierPart(previous)) {
4482 for (int t = 0; t < tagLength; t++) {
4485 if (x >= this.eofPosition || x >= commentEnd)
4487 if ((sc = src[i + t]) != (tc = tag[t])) { // case
4490 if (this.isTaskCaseSensitive
4491 || (Character.toLowerCase(sc) != Character
4492 .toLowerCase(tc))) { // case
4499 // ensure tag is not followed with letter if tag finishes
4502 if (i + tagLength < commentEnd
4503 && Scanner.isPHPIdentifierPart(src[i + tagLength
4505 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4508 if (this.foundTaskTags == null) {
4509 this.foundTaskTags = new char[5][];
4510 this.foundTaskMessages = new char[5][];
4511 this.foundTaskPriorities = new char[5][];
4512 this.foundTaskPositions = new int[5][];
4513 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4518 this.foundTaskTags = new char[this.foundTaskCount * 2][],
4519 0, this.foundTaskCount);
4522 this.foundTaskMessages,
4524 this.foundTaskMessages = new char[this.foundTaskCount * 2][],
4525 0, this.foundTaskCount);
4528 this.foundTaskPriorities,
4530 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4531 0, this.foundTaskCount);
4534 this.foundTaskPositions,
4536 this.foundTaskPositions = new int[this.foundTaskCount * 2][],
4537 0, this.foundTaskCount);
4540 priority = this.taskPriorities != null
4541 && itag < this.taskPriorities.length ? this.taskPriorities[itag]
4544 this.foundTaskTags[this.foundTaskCount] = tag;
4545 this.foundTaskPriorities[this.foundTaskCount] = priority;
4546 this.foundTaskPositions[this.foundTaskCount] = new int[] {
4547 i, i + tagLength - 1 };
4548 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4549 this.foundTaskCount++;
4550 i += tagLength - 1; // will be incremented when looping
4556 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4557 // retrieve message start and end positions
4558 int msgStart = this.foundTaskPositions[i][0]
4559 + this.foundTaskTags[i].length;
4560 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1
4562 // at most beginning of next task
4563 if (max_value < msgStart) {
4564 max_value = msgStart; // would only occur if tag is before
4569 for (int j = msgStart; j < max_value; j++) {
4570 if ((c = src[j]) == '\n' || c == '\r') {
4576 for (int j = max_value; j > msgStart; j--) {
4577 if ((c = src[j]) == '*') {
4585 if (msgStart == end)
4588 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4590 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4592 // update the end position of the task
4593 this.foundTaskPositions[i][1] = end;
4594 // get the message source
4595 final int messageLength = end - msgStart + 1;
4596 char[] message = new char[messageLength];
4597 System.arraycopy(src, msgStart, message, 0, messageLength);
4598 this.foundTaskMessages[i] = message;
4602 // chech presence of task: tags
4603 // public void checkTaskTag(int commentStart, int commentEnd) {
4604 // // only look for newer task: tags
4605 // if (this.foundTaskCount > 0 &&
4606 // this.foundTaskPositions[this.foundTaskCount
4607 // - 1][0] >= commentStart) {
4610 // int foundTaskIndex = this.foundTaskCount;
4611 // nextChar: for (int i = commentStart; i < commentEnd && i <
4612 // this.eofPosition; i++) {
4613 // char[] tag = null;
4614 // char[] priority = null;
4615 // // check for tag occurrence
4616 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4617 // tag = this.taskTags[itag];
4618 // priority = this.taskPriorities != null && itag <
4619 // this.taskPriorities.length
4620 // ? this.taskPriorities[itag] : null;
4621 // int tagLength = tag.length;
4622 // for (int t = 0; t < tagLength; t++) {
4623 // if (this.source[i + t] != tag[t])
4624 // continue nextTag;
4626 // if (this.foundTaskTags == null) {
4627 // this.foundTaskTags = new char[5][];
4628 // this.foundTaskMessages = new char[5][];
4629 // this.foundTaskPriorities = new char[5][];
4630 // this.foundTaskPositions = new int[5][];
4631 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4632 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4633 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4634 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4635 // char[this.foundTaskCount * 2][], 0,
4636 // this.foundTaskCount);
4637 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4638 // new char[this.foundTaskCount * 2][], 0,
4639 // this.foundTaskCount);
4640 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions =
4642 // int[this.foundTaskCount * 2][], 0,
4643 // this.foundTaskCount);
4645 // this.foundTaskTags[this.foundTaskCount] = tag;
4646 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4647 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i +
4650 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4651 // this.foundTaskCount++;
4652 // i += tagLength - 1; // will be incremented when looping
4655 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4656 // // retrieve message start and end positions
4657 // int msgStart = this.foundTaskPositions[i][0] +
4658 // this.foundTaskTags[i].length;
4659 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4660 // 1][0] - 1 : commentEnd - 1;
4661 // // at most beginning of next task
4662 // if (max_value < msgStart)
4663 // max_value = msgStart; // would only occur if tag is before EOF.
4666 // for (int j = msgStart; j < max_value; j++) {
4667 // if ((c = this.source[j]) == '\n' || c == '\r') {
4673 // for (int j = max_value; j > msgStart; j--) {
4674 // if ((c = this.source[j]) == '*') {
4682 // if (msgStart == end)
4683 // continue; // empty
4684 // // trim the message
4685 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4687 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4689 // // update the end position of the task
4690 // this.foundTaskPositions[i][1] = end;
4691 // // get the message source
4692 // final int messageLength = end - msgStart + 1;
4693 // char[] message = new char[messageLength];
4694 // System.arraycopy(source, msgStart, message, 0, messageLength);
4695 // this.foundTaskMessages[i] = message;