1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token
24 * (this value is not memorized by the scanner) - getCurrentTokenSource()
25 * which provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into
27 * the stream - currentPosition-1 gives the sourceEnd position into the
31 // private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
34 // flag indicating if processed source contains occurrences of keyword
36 public boolean containsAssertKeyword = false;
38 public boolean recordLineSeparator;
40 public boolean ignorePHPOneLiner = false;
42 public boolean phpMode = false;
45 * This token is set to TokenName.echo if a short tag block begins (i.e.
46 * >?= ... ) Directly after the "=" character the
47 * getNextToken() method returns TokenName.INLINE_HTML In the next call to
48 * the getNextToken() method the value of fFillerToken (==TokenName.echo) is
52 TokenName fFillerToken = TokenName.EOF;
54 public char currentCharacter;
56 public int startPosition;
58 public int currentPosition;
60 public int initialPosition, eofPosition;
62 // after this position eof are generated instead of real token from the
64 public boolean tokenizeComments;
66 public boolean tokenizeWhiteSpace;
68 public boolean tokenizeStrings;
70 // source should be viewed as a window (aka a part)
71 // of a entire very large stream
75 public char[] withoutUnicodeBuffer;
77 public int withoutUnicodePtr;
79 // when == 0 ==> no unicode in the current token
80 public boolean unicodeAsBackSlash = false;
82 public boolean scanningFloatLiteral = false;
84 // support for /** comments
85 public int[] commentStops = new int[10];
87 public int[] commentStarts = new int[10];
89 public int commentPtr = -1; // no comment test with commentPtr value -1
91 protected int lastCommentLinePosition = -1;
93 // diet parsing support - jump over some method body when requested
94 public boolean diet = false;
96 // support for the poor-line-debuggers ....
97 // remember the position of the cr/lf
98 public int[] lineEnds = new int[250];
100 public int linePtr = -1;
102 public boolean wasAcr = false;
104 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
106 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
108 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
110 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
112 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
114 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
116 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
118 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
120 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
122 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
124 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
126 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
128 // ----------------optimized identifier managment------------------
129 static final char[] charArray_a = new char[] { 'a' },
130 charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
131 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' },
132 charArray_f = new char[] { 'f' }, charArray_g = new char[] { 'g' },
133 charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
134 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' },
135 charArray_l = new char[] { 'l' }, charArray_m = new char[] { 'm' },
136 charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
137 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' },
138 charArray_r = new char[] { 'r' }, charArray_s = new char[] { 's' },
139 charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
140 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' },
141 charArray_x = new char[] { 'x' }, charArray_y = new char[] { 'y' },
142 charArray_z = new char[] { 'z' };
144 static final char[] charArray_va = new char[] { '$', 'a' },
145 charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
146 '$', 'c' }, charArray_vd = new char[] { '$', 'd' },
147 charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] {
148 '$', 'f' }, charArray_vg = new char[] { '$', 'g' },
149 charArray_vh = new char[] { '$', 'h' }, charArray_vi = new char[] {
150 '$', 'i' }, charArray_vj = new char[] { '$', 'j' },
151 charArray_vk = new char[] { '$', 'k' }, charArray_vl = new char[] {
152 '$', 'l' }, charArray_vm = new char[] { '$', 'm' },
153 charArray_vn = new char[] { '$', 'n' }, charArray_vo = new char[] {
154 '$', 'o' }, charArray_vp = new char[] { '$', 'p' },
155 charArray_vq = new char[] { '$', 'q' }, charArray_vr = new char[] {
156 '$', 'r' }, charArray_vs = new char[] { '$', 's' },
157 charArray_vt = new char[] { '$', 't' }, charArray_vu = new char[] {
158 '$', 'u' }, charArray_vv = new char[] { '$', 'v' },
159 charArray_vw = new char[] { '$', 'w' }, charArray_vx = new char[] {
160 '$', 'x' }, charArray_vy = new char[] { '$', 'y' },
161 charArray_vz = new char[] { '$', 'z' };
163 public final static int MAX_OBVIOUS = 256;
165 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
167 public final static int C_DOLLAR = 8;
169 public final static int C_LETTER = 4;
171 public final static int C_DIGIT = 3;
173 public final static int C_SEPARATOR = 2;
175 public final static int C_SPACE = 1;
177 for (int i = '0'; i <= '9'; i++)
178 ObviousIdentCharNatures[i] = C_DIGIT;
180 for (int i = 'a'; i <= 'z'; i++)
181 ObviousIdentCharNatures[i] = C_LETTER;
182 for (int i = 'A'; i <= 'Z'; i++)
183 ObviousIdentCharNatures[i] = C_LETTER;
184 ObviousIdentCharNatures['_'] = C_LETTER;
185 for (int i = 127; i <= 255; i++)
186 ObviousIdentCharNatures[i] = C_LETTER;
188 ObviousIdentCharNatures['$'] = C_DOLLAR;
190 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
191 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
192 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
193 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
194 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL
197 ObviousIdentCharNatures['.'] = C_SEPARATOR;
198 ObviousIdentCharNatures[':'] = C_SEPARATOR;
199 ObviousIdentCharNatures[';'] = C_SEPARATOR;
200 ObviousIdentCharNatures[','] = C_SEPARATOR;
201 ObviousIdentCharNatures['['] = C_SEPARATOR;
202 ObviousIdentCharNatures[']'] = C_SEPARATOR;
203 ObviousIdentCharNatures['('] = C_SEPARATOR;
204 ObviousIdentCharNatures[')'] = C_SEPARATOR;
205 ObviousIdentCharNatures['{'] = C_SEPARATOR;
206 ObviousIdentCharNatures['}'] = C_SEPARATOR;
207 ObviousIdentCharNatures['+'] = C_SEPARATOR;
208 ObviousIdentCharNatures['-'] = C_SEPARATOR;
209 ObviousIdentCharNatures['*'] = C_SEPARATOR;
210 ObviousIdentCharNatures['/'] = C_SEPARATOR;
211 ObviousIdentCharNatures['='] = C_SEPARATOR;
212 ObviousIdentCharNatures['&'] = C_SEPARATOR;
213 ObviousIdentCharNatures['|'] = C_SEPARATOR;
214 ObviousIdentCharNatures['?'] = C_SEPARATOR;
215 ObviousIdentCharNatures['<'] = C_SEPARATOR;
216 ObviousIdentCharNatures['>'] = C_SEPARATOR;
217 ObviousIdentCharNatures['!'] = C_SEPARATOR;
218 ObviousIdentCharNatures['%'] = C_SEPARATOR;
219 ObviousIdentCharNatures['^'] = C_SEPARATOR;
220 ObviousIdentCharNatures['~'] = C_SEPARATOR;
221 ObviousIdentCharNatures['"'] = C_SEPARATOR;
222 ObviousIdentCharNatures['\''] = C_SEPARATOR;
225 static final char[] initCharArray = new char[] { '\u0000', '\u0000',
226 '\u0000', '\u0000', '\u0000', '\u0000' };
228 static final int TableSize = 30, InternalTableSize = 6;
230 // 30*6 = 180 entries
231 public static final int OptimizedLength = 6;
234 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
236 // support for detecting non-externalized string literals
237 int currentLineNr = -1;
239 int previousLineNr = -1;
241 NLSLine currentLine = null;
243 List lines = new ArrayList();
245 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
247 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
249 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
251 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
253 public StringLiteral[] nonNLSStrings = null;
255 public boolean checkNonExternalizedStringLiterals = true;
257 public boolean wasNonExternalizedStringLiteral = false;
260 for (int i = 0; i < 6; i++) {
261 for (int j = 0; j < TableSize; j++) {
262 for (int k = 0; k < InternalTableSize; k++) {
263 charArray_length[i][j][k] = initCharArray;
269 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
272 public static final int RoundBracket = 0;
274 public static final int SquareBracket = 1;
276 public static final int CurlyBracket = 2;
278 public static final int BracketKinds = 3;
281 public char[][] foundTaskTags = null;
283 public char[][] foundTaskMessages;
285 public char[][] foundTaskPriorities = null;
287 public int[][] foundTaskPositions;
289 public int foundTaskCount = 0;
291 public char[][] taskTags = null;
293 public char[][] taskPriorities = null;
295 public boolean isTaskCaseSensitive = true;
297 public static final boolean DEBUG = false;
299 public static final boolean TRACE = false;
301 public ICompilationUnit compilationUnit = null;
304 * Determines if the specified character is permissible as the first
305 * character in a PHP identifier or variable
307 * The '$' character for PHP variables is regarded as a correct first
311 public static boolean isPHPIdentOrVarStart(char ch) {
312 if (ch < MAX_OBVIOUS) {
313 return ObviousIdentCharNatures[ch] == C_LETTER
314 || ObviousIdentCharNatures[ch] == C_DOLLAR;
317 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F
319 // ch && ch <= 0xFF);
323 * Determines if the specified character is permissible as the first
324 * character in a PHP identifier.
326 * The '$' character for PHP variables isn't regarded as the first character !
328 public static boolean isPHPIdentifierStart(char ch) {
329 if (ch < MAX_OBVIOUS) {
330 return ObviousIdentCharNatures[ch] == C_LETTER;
333 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
338 * Determines if the specified character may be part of a PHP identifier as
339 * other than the first character
341 public static boolean isPHPIdentifierPart(char ch) {
342 if (ch < MAX_OBVIOUS) {
343 return ObviousIdentCharNatures[ch] == C_LETTER
344 || ObviousIdentCharNatures[ch] == C_DIGIT;
347 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch &&
352 // public static boolean isSQLIdentifierPart(char ch) {
353 // if (ch < MAX_OBVIOUS) {
354 // return ObviousIdentCharNatures[ch] == C_LETTER
355 // || ObviousIdentCharNatures[ch] == C_DIGIT;
360 public final boolean atEnd() {
361 // This code is not relevant if source is
362 // Only a part of the real stream input
363 return source.length == currentPosition;
366 public char[] getCurrentIdentifierSource() {
367 // return the token REAL source (aka unicodes are precomputed)
369 // if (withoutUnicodePtr != 0)
370 // //0 is used as a fast test flag so the real first char is in position
373 // withoutUnicodeBuffer,
375 // result = new char[withoutUnicodePtr],
377 // withoutUnicodePtr);
379 int length = currentPosition - startPosition;
380 switch (length) { // see OptimizedLength
382 return optimizedCurrentTokenSource1();
384 return optimizedCurrentTokenSource2();
386 return optimizedCurrentTokenSource3();
388 return optimizedCurrentTokenSource4();
390 return optimizedCurrentTokenSource5();
392 return optimizedCurrentTokenSource6();
395 System.arraycopy(source, startPosition, result = new char[length], 0,
401 public int getCurrentTokenEndPosition() {
402 return this.currentPosition - 1;
405 public final char[] getCurrentTokenSource() {
406 // Return the token REAL source (aka unicodes are precomputed)
408 // if (withoutUnicodePtr != 0)
409 // // 0 is used as a fast test flag so the real first char is in
412 // withoutUnicodeBuffer,
414 // result = new char[withoutUnicodePtr],
416 // withoutUnicodePtr);
419 System.arraycopy(source, startPosition,
420 result = new char[length = currentPosition - startPosition], 0,
426 public final char[] getCurrentTokenSource(int startPos) {
427 // Return the token REAL source (aka unicodes are precomputed)
429 // if (withoutUnicodePtr != 0)
430 // // 0 is used as a fast test flag so the real first char is in
433 // withoutUnicodeBuffer,
435 // result = new char[withoutUnicodePtr],
437 // withoutUnicodePtr);
440 System.arraycopy(source, startPos,
441 result = new char[length = currentPosition - startPos], 0,
447 public final char[] getCurrentTokenSourceString() {
448 // return the token REAL source (aka unicodes are precomputed).
449 // REMOVE the two " that are at the beginning and the end.
451 if (withoutUnicodePtr != 0)
452 // 0 is used as a fast test flag so the real first char is in
454 System.arraycopy(withoutUnicodeBuffer, 2,
455 // 2 is 1 (real start) + 1 (to jump over the ")
456 result = new char[withoutUnicodePtr - 2], 0,
457 withoutUnicodePtr - 2);
460 System.arraycopy(source, startPosition + 1,
461 result = new char[length = currentPosition - startPosition
467 public final boolean equalsCurrentTokenSource(char[] word) {
468 if (word.length != currentPosition - startPosition) {
471 for (int i = 0; i < word.length; i++) {
472 if (word[i] != source[startPosition + i]) {
479 // public final char[] getRawTokenSourceEnd() {
480 // int length = this.eofPosition - this.currentPosition - 1;
481 // char[] sourceEnd = new char[length];
482 // System.arraycopy(this.source, this.currentPosition, sourceEnd, 0,
487 public int getCurrentTokenStartPosition() {
488 return this.startPosition;
491 // public final String getCurrentStringLiteral() {
492 // char[] result = getCurrentStringLiteralSource();
493 // return new String(result);
496 public final char[] getCurrentStringLiteralSource() {
497 // Return the token REAL source (aka unicodes are precomputed)
498 if (startPosition + 1 >= currentPosition) {
504 .arraycopy(source, startPosition + 1,
505 result = new char[length = currentPosition
506 - startPosition - 2], 0, length);
511 // public final char[] getCurrentStringLiteralSource(int startPos) {
512 // // Return the token REAL source (aka unicodes are precomputed)
515 // System.arraycopy(source, startPos + 1,
516 // result = new char[length = currentPosition - startPos - 2], 0,
523 * Search the source position corresponding to the end of a given line
526 * Line numbers are 1-based, and relative to the scanner initialPosition.
527 * Character positions are 0-based.
529 * In case the given line number is inconsistent, answers -1.
531 public final int getLineEnd(int lineNumber) {
532 if (lineEnds == null)
534 if (lineNumber >= lineEnds.length)
538 if (lineNumber == lineEnds.length - 1)
540 return lineEnds[lineNumber - 1];
541 // next line start one character behind the lineEnd of the previous line
545 * Search the source position corresponding to the beginning of a given line
548 * Line numbers are 1-based, and relative to the scanner initialPosition.
549 * Character positions are 0-based.
551 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
553 * In case the given line number is inconsistent, answers -1.
555 public final int getLineStart(int lineNumber) {
556 if (lineEnds == null)
558 if (lineNumber >= lineEnds.length)
563 return initialPosition;
564 return lineEnds[lineNumber - 2] + 1;
565 // next line start one character behind the lineEnd of the previous line
568 public final boolean getNextChar(char testedChar) {
570 // handle the case of unicode.
571 // when a unicode appears then we must use a buffer that holds char
573 // At the end of this method currentCharacter holds the new visited char
574 // and currentPosition points right next after it
575 // Both previous lines are true if the currentCharacter is == to the
577 // On false, no side effect has occured.
578 // ALL getNextChar.... ARE OPTIMIZED COPIES
579 int temp = currentPosition;
581 currentCharacter = source[currentPosition++];
582 // if (((currentCharacter = source[currentPosition++]) == '\\')
583 // && (source[currentPosition] == 'u')) {
584 // //-------------unicode traitement ------------
585 // int c1, c2, c3, c4;
586 // int unicodeSize = 6;
587 // currentPosition++;
588 // while (source[currentPosition] == 'u') {
589 // currentPosition++;
593 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
596 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
599 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
602 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
605 // currentPosition = temp;
609 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
610 // if (currentCharacter != testedChar) {
611 // currentPosition = temp;
614 // unicodeAsBackSlash = currentCharacter == '\\';
616 // //need the unicode buffer
617 // if (withoutUnicodePtr == 0) {
618 // //buffer all the entries that have been left aside....
619 // withoutUnicodePtr = currentPosition - unicodeSize -
624 // withoutUnicodeBuffer,
626 // withoutUnicodePtr);
628 // //fill the buffer with the char
629 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
632 // } //-------------end unicode traitement--------------
634 if (currentCharacter != testedChar) {
635 currentPosition = temp;
638 unicodeAsBackSlash = false;
639 // if (withoutUnicodePtr != 0)
640 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
643 } catch (IndexOutOfBoundsException e) {
644 unicodeAsBackSlash = false;
645 currentPosition = temp;
650 public final int getNextChar(char testedChar1, char testedChar2) {
651 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
652 // test can be done with (x==0) for the first and (x>0) for the second
653 // handle the case of unicode.
654 // when a unicode appears then we must use a buffer that holds char
656 // At the end of this method currentCharacter holds the new visited char
657 // and currentPosition points right next after it
658 // Both previous lines are true if the currentCharacter is == to the
660 // On false, no side effect has occured.
661 // ALL getNextChar.... ARE OPTIMIZED COPIES
662 int temp = currentPosition;
665 currentCharacter = source[currentPosition++];
666 // if (((currentCharacter = source[currentPosition++]) == '\\')
667 // && (source[currentPosition] == 'u')) {
668 // //-------------unicode traitement ------------
669 // int c1, c2, c3, c4;
670 // int unicodeSize = 6;
671 // currentPosition++;
672 // while (source[currentPosition] == 'u') {
673 // currentPosition++;
677 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
680 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
683 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
686 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
689 // currentPosition = temp;
693 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
694 // if (currentCharacter == testedChar1)
696 // else if (currentCharacter == testedChar2)
699 // currentPosition = temp;
703 // //need the unicode buffer
704 // if (withoutUnicodePtr == 0) {
705 // //buffer all the entries that have been left aside....
706 // withoutUnicodePtr = currentPosition - unicodeSize -
711 // withoutUnicodeBuffer,
713 // withoutUnicodePtr);
715 // //fill the buffer with the char
716 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
718 // } //-------------end unicode traitement--------------
720 if (currentCharacter == testedChar1)
722 else if (currentCharacter == testedChar2)
725 currentPosition = temp;
728 // if (withoutUnicodePtr != 0)
729 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
732 } catch (IndexOutOfBoundsException e) {
733 currentPosition = temp;
738 public final boolean getNextCharAsDigit() {
740 // handle the case of unicode.
741 // when a unicode appears then we must use a buffer that holds char
743 // At the end of this method currentCharacter holds the new visited char
744 // and currentPosition points right next after it
745 // Both previous lines are true if the currentCharacter is a digit
746 // On false, no side effect has occured.
747 // ALL getNextChar.... ARE OPTIMIZED COPIES
748 int temp = currentPosition;
750 currentCharacter = source[currentPosition++];
751 // if (((currentCharacter = source[currentPosition++]) == '\\')
752 // && (source[currentPosition] == 'u')) {
753 // //-------------unicode traitement ------------
754 // int c1, c2, c3, c4;
755 // int unicodeSize = 6;
756 // currentPosition++;
757 // while (source[currentPosition] == 'u') {
758 // currentPosition++;
762 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
765 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
768 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
771 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
774 // currentPosition = temp;
778 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
779 // if (!Character.isDigit(currentCharacter)) {
780 // currentPosition = temp;
784 // //need the unicode buffer
785 // if (withoutUnicodePtr == 0) {
786 // //buffer all the entries that have been left aside....
787 // withoutUnicodePtr = currentPosition - unicodeSize -
792 // withoutUnicodeBuffer,
794 // withoutUnicodePtr);
796 // //fill the buffer with the char
797 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
799 // } //-------------end unicode traitement--------------
801 if (!Character.isDigit(currentCharacter)) {
802 currentPosition = temp;
805 // if (withoutUnicodePtr != 0)
806 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
809 } catch (IndexOutOfBoundsException e) {
810 currentPosition = temp;
815 public final boolean getNextCharAsDigit(int radix) {
817 // handle the case of unicode.
818 // when a unicode appears then we must use a buffer that holds char
820 // At the end of this method currentCharacter holds the new visited char
821 // and currentPosition points right next after it
822 // Both previous lines are true if the currentCharacter is a digit base
825 // On false, no side effect has occured.
826 // ALL getNextChar.... ARE OPTIMIZED COPIES
827 int temp = currentPosition;
829 currentCharacter = source[currentPosition++];
830 // if (((currentCharacter = source[currentPosition++]) == '\\')
831 // && (source[currentPosition] == 'u')) {
832 // //-------------unicode traitement ------------
833 // int c1, c2, c3, c4;
834 // int unicodeSize = 6;
835 // currentPosition++;
836 // while (source[currentPosition] == 'u') {
837 // currentPosition++;
841 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
844 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
847 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
850 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
853 // currentPosition = temp;
857 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
858 // if (Character.digit(currentCharacter, radix) == -1) {
859 // currentPosition = temp;
863 // //need the unicode buffer
864 // if (withoutUnicodePtr == 0) {
865 // //buffer all the entries that have been left aside....
866 // withoutUnicodePtr = currentPosition - unicodeSize -
871 // withoutUnicodeBuffer,
873 // withoutUnicodePtr);
875 // //fill the buffer with the char
876 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
878 // } //-------------end unicode traitement--------------
880 if (Character.digit(currentCharacter, radix) == -1) {
881 currentPosition = temp;
884 // if (withoutUnicodePtr != 0)
885 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
888 } catch (IndexOutOfBoundsException e) {
889 currentPosition = temp;
894 public boolean getNextCharAsJavaIdentifierPart() {
896 // handle the case of unicode.
897 // when a unicode appears then we must use a buffer that holds char
899 // At the end of this method currentCharacter holds the new visited char
900 // and currentPosition points right next after it
901 // Both previous lines are true if the currentCharacter is a
902 // JavaIdentifierPart
903 // On false, no side effect has occured.
904 // ALL getNextChar.... ARE OPTIMIZED COPIES
905 int temp = currentPosition;
907 currentCharacter = source[currentPosition++];
908 // if (((currentCharacter = source[currentPosition++]) == '\\')
909 // && (source[currentPosition] == 'u')) {
910 // //-------------unicode traitement ------------
911 // int c1, c2, c3, c4;
912 // int unicodeSize = 6;
913 // currentPosition++;
914 // while (source[currentPosition] == 'u') {
915 // currentPosition++;
919 // if (((c1 = Character.getNumericValue(source[currentPosition++]))
922 // || ((c2 = Character.getNumericValue(source[currentPosition++])) >
925 // || ((c3 = Character.getNumericValue(source[currentPosition++])) >
928 // || ((c4 = Character.getNumericValue(source[currentPosition++])) >
931 // currentPosition = temp;
935 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
936 // if (!isPHPIdentifierPart(currentCharacter)) {
937 // currentPosition = temp;
941 // //need the unicode buffer
942 // if (withoutUnicodePtr == 0) {
943 // //buffer all the entries that have been left aside....
944 // withoutUnicodePtr = currentPosition - unicodeSize -
949 // withoutUnicodeBuffer,
951 // withoutUnicodePtr);
953 // //fill the buffer with the char
954 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
956 // } //-------------end unicode traitement--------------
958 if (!isPHPIdentifierPart(currentCharacter)) {
959 currentPosition = temp;
962 // if (withoutUnicodePtr != 0)
963 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
966 } catch (IndexOutOfBoundsException e) {
967 currentPosition = temp;
972 public TokenName getCastOrParen() {
973 int tempPosition = currentPosition;
974 char tempCharacter = currentCharacter;
975 TokenName tempToken = TokenName.LPAREN;
976 boolean found = false;
977 StringBuffer buf = new StringBuffer();
980 currentCharacter = source[currentPosition++];
981 } while (currentCharacter == ' ' || currentCharacter == '\t');
982 while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
983 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
984 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
985 buf.append(currentCharacter);
986 currentCharacter = source[currentPosition++];
988 if (buf.length() >= 3 && buf.length() <= 7) {
989 char[] data = buf.toString().toCharArray();
991 switch (data.length) {
994 if ((data[index] == 'i') && (data[++index] == 'n')
995 && (data[++index] == 't')) {
997 tempToken = TokenName.INTCAST;
1002 if ((data[index] == 'b') && (data[++index] == 'o')
1003 && (data[++index] == 'o') && (data[++index] == 'l')) {
1005 tempToken = TokenName.BOOLCAST;
1008 if ((data[index] == 'r') && (data[++index] == 'e')
1009 && (data[++index] == 'a')
1010 && (data[++index] == 'l')) {
1012 tempToken = TokenName.DOUBLECAST;
1017 // array unset float
1018 if ((data[index] == 'a') && (data[++index] == 'r')
1019 && (data[++index] == 'r') && (data[++index] == 'a')
1020 && (data[++index] == 'y')) {
1022 tempToken = TokenName.ARRAYCAST;
1025 if ((data[index] == 'u') && (data[++index] == 'n')
1026 && (data[++index] == 's')
1027 && (data[++index] == 'e')
1028 && (data[++index] == 't')) {
1030 tempToken = TokenName.UNSETCAST;
1033 if ((data[index] == 'f') && (data[++index] == 'l')
1034 && (data[++index] == 'o')
1035 && (data[++index] == 'a')
1036 && (data[++index] == 't')) {
1038 tempToken = TokenName.DOUBLECAST;
1044 // object string double
1045 if ((data[index] == 'o') && (data[++index] == 'b')
1046 && (data[++index] == 'j') && (data[++index] == 'e')
1047 && (data[++index] == 'c') && (data[++index] == 't')) {
1049 tempToken = TokenName.OBJECTCAST;
1052 if ((data[index] == 's') && (data[++index] == 't')
1053 && (data[++index] == 'r')
1054 && (data[++index] == 'i')
1055 && (data[++index] == 'n')
1056 && (data[++index] == 'g')) {
1058 tempToken = TokenName.STRINGCAST;
1061 if ((data[index] == 'd') && (data[++index] == 'o')
1062 && (data[++index] == 'u')
1063 && (data[++index] == 'b')
1064 && (data[++index] == 'l')
1065 && (data[++index] == 'e')) {
1067 tempToken = TokenName.DOUBLECAST;
1074 if ((data[index] == 'b') && (data[++index] == 'o')
1075 && (data[++index] == 'o') && (data[++index] == 'l')
1076 && (data[++index] == 'e') && (data[++index] == 'a')
1077 && (data[++index] == 'n')) {
1079 tempToken = TokenName.BOOLCAST;
1082 if ((data[index] == 'i') && (data[++index] == 'n')
1083 && (data[++index] == 't')
1084 && (data[++index] == 'e')
1085 && (data[++index] == 'g')
1086 && (data[++index] == 'e')
1087 && (data[++index] == 'r')) {
1089 tempToken = TokenName.INTCAST;
1095 while (currentCharacter == ' ' || currentCharacter == '\t') {
1096 currentCharacter = source[currentPosition++];
1098 if (currentCharacter == ')') {
1103 } catch (IndexOutOfBoundsException e) {
1105 currentCharacter = tempCharacter;
1106 currentPosition = tempPosition;
1107 return TokenName.LPAREN;
1114 public void consumeStringInterpolated() throws InvalidInputException {
1116 // consume next character
1117 unicodeAsBackSlash = false;
1118 currentCharacter = source[currentPosition++];
1119 // if (((currentCharacter = source[currentPosition++]) == '\\')
1120 // && (source[currentPosition] == 'u')) {
1121 // getNextUnicodeChar();
1123 // if (withoutUnicodePtr != 0) {
1124 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1125 // currentCharacter;
1128 while (currentCharacter != '`') {
1129 /** ** in PHP \r and \n are valid in string literals *** */
1130 // if ((currentCharacter == '\n')
1131 // || (currentCharacter == '\r')) {
1132 // // relocate if finding another quote fairly close: thus
1134 // '/u000D' will be fully consumed
1135 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1136 // if (currentPosition + lookAhead == source.length)
1138 // if (source[currentPosition + lookAhead] == '\n')
1140 // if (source[currentPosition + lookAhead] == '\"') {
1141 // currentPosition += lookAhead + 1;
1145 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1147 if (currentCharacter == '\\') {
1148 int escapeSize = currentPosition;
1149 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1150 // scanEscapeCharacter make a side effect on this value and
1152 // the previous value few lines down this one
1153 scanDoubleQuotedEscapeCharacter();
1154 escapeSize = currentPosition - escapeSize;
1155 if (withoutUnicodePtr == 0) {
1156 // buffer all the entries that have been left aside....
1157 withoutUnicodePtr = currentPosition - escapeSize - 1
1159 System.arraycopy(source, startPosition,
1160 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1161 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1162 } else { // overwrite the / in the buffer
1163 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1164 if (backSlashAsUnicodeInString) { // there are TWO \
1166 // where only one is correct
1167 withoutUnicodePtr--;
1170 } else if ((currentCharacter == '\r')
1171 || (currentCharacter == '\n')) {
1172 if (recordLineSeparator) {
1173 pushLineSeparator();
1176 // consume next character
1177 unicodeAsBackSlash = false;
1178 currentCharacter = source[currentPosition++];
1179 // if (((currentCharacter = source[currentPosition++]) == '\\')
1180 // && (source[currentPosition] == 'u')) {
1181 // getNextUnicodeChar();
1183 if (withoutUnicodePtr != 0) {
1184 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1188 } catch (IndexOutOfBoundsException e) {
1189 // reset end position for error reporting
1190 currentPosition -= 2;
1191 throw new InvalidInputException(UNTERMINATED_STRING);
1192 } catch (InvalidInputException e) {
1193 if (e.getMessage().equals(INVALID_ESCAPE)) {
1194 // relocate if finding another quote fairly close: thus unicode
1195 // '/u000D' will be fully consumed
1196 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1197 if (currentPosition + lookAhead == source.length)
1199 if (source[currentPosition + lookAhead] == '\n')
1201 if (source[currentPosition + lookAhead] == '`') {
1202 currentPosition += lookAhead + 1;
1209 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1211 // //$NON-NLS-?$ where ? is an
1213 if (currentLine == null) {
1214 currentLine = new NLSLine();
1215 lines.add(currentLine);
1217 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1218 startPosition, currentPosition - 1));
1222 public void consumeStringConstant() throws InvalidInputException {
1224 // consume next character
1225 unicodeAsBackSlash = false;
1226 currentCharacter = source[currentPosition++];
1227 // if (((currentCharacter = source[currentPosition++]) == '\\')
1228 // && (source[currentPosition] == 'u')) {
1229 // getNextUnicodeChar();
1231 // if (withoutUnicodePtr != 0) {
1232 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1233 // currentCharacter;
1236 while (currentCharacter != '\'') {
1237 /** ** in PHP \r and \n are valid in string literals *** */
1238 // if ((currentCharacter == '\n')
1239 // || (currentCharacter == '\r')) {
1240 // // relocate if finding another quote fairly close: thus
1242 // '/u000D' will be fully consumed
1243 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1244 // if (currentPosition + lookAhead == source.length)
1246 // if (source[currentPosition + lookAhead] == '\n')
1248 // if (source[currentPosition + lookAhead] == '\"') {
1249 // currentPosition += lookAhead + 1;
1253 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1255 if (currentCharacter == '\\') {
1256 int escapeSize = currentPosition;
1257 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1258 // scanEscapeCharacter make a side effect on this value and
1260 // the previous value few lines down this one
1261 scanSingleQuotedEscapeCharacter();
1262 escapeSize = currentPosition - escapeSize;
1263 if (withoutUnicodePtr == 0) {
1264 // buffer all the entries that have been left aside....
1265 withoutUnicodePtr = currentPosition - escapeSize - 1
1267 System.arraycopy(source, startPosition,
1268 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1269 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1270 } else { // overwrite the / in the buffer
1271 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1272 if (backSlashAsUnicodeInString) { // there are TWO \
1274 // where only one is correct
1275 withoutUnicodePtr--;
1278 } else if ((currentCharacter == '\r')
1279 || (currentCharacter == '\n')) {
1280 if (recordLineSeparator) {
1281 pushLineSeparator();
1284 // consume next character
1285 unicodeAsBackSlash = false;
1286 currentCharacter = source[currentPosition++];
1287 // if (((currentCharacter = source[currentPosition++]) == '\\')
1288 // && (source[currentPosition] == 'u')) {
1289 // getNextUnicodeChar();
1291 if (withoutUnicodePtr != 0) {
1292 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1296 } catch (IndexOutOfBoundsException e) {
1297 // reset end position for error reporting
1298 currentPosition -= 2;
1299 throw new InvalidInputException(UNTERMINATED_STRING);
1300 } catch (InvalidInputException e) {
1301 if (e.getMessage().equals(INVALID_ESCAPE)) {
1302 // relocate if finding another quote fairly close: thus unicode
1303 // '/u000D' will be fully consumed
1304 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1305 if (currentPosition + lookAhead == source.length)
1307 if (source[currentPosition + lookAhead] == '\n')
1309 if (source[currentPosition + lookAhead] == '\'') {
1310 currentPosition += lookAhead + 1;
1317 if (checkNonExternalizedStringLiterals) { // check for presence of NLS
1319 // //$NON-NLS-?$ where ? is an
1321 if (currentLine == null) {
1322 currentLine = new NLSLine();
1323 lines.add(currentLine);
1325 currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1326 startPosition, currentPosition - 1));
1334 public void consumeStringLiteral() throws InvalidInputException {
1336 int openDollarBrace = 0;
1338 unicodeAsBackSlash = false;
1339 currentCharacter = source[currentPosition++]; // consume next character
1341 while (currentCharacter != '"' || // As long as the ending '"' isn't found, or
1342 openDollarBrace > 0) { // the last '}' isn't found
1343 if (currentCharacter == '\\') {
1344 int escapeSize = currentPosition;
1345 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1347 // scanEscapeCharacter make a side effect on this value and we need
1348 // the previous value few lines down this one
1349 scanDoubleQuotedEscapeCharacter ();
1350 escapeSize = currentPosition - escapeSize;
1352 if (withoutUnicodePtr == 0) { // buffer all the entries that have been left aside....
1353 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1354 System.arraycopy (source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1355 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1357 else { // overwrite the / in the buffer
1358 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1360 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream where only one is correct
1361 withoutUnicodePtr--;
1365 else if (currentCharacter == '$' && source[currentPosition] == '{') { // If found '${'
1367 currentCharacter = source[currentPosition++]; // consume next character, or we count one open brace to much!
1369 else if (currentCharacter == '{' && source[currentPosition] == '$') { // If found '{$'
1372 else if (currentCharacter == '}') { // If found '}'
1375 else if ((currentCharacter == '\r') || (currentCharacter == '\n')) { // In PHP \r and \n are valid in string literals
1376 if (recordLineSeparator) {
1377 pushLineSeparator ();
1381 unicodeAsBackSlash = false;
1382 currentCharacter = source[currentPosition++]; // consume next character
1384 if (withoutUnicodePtr != 0) {
1385 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1388 } catch (IndexOutOfBoundsException e) {
1389 // reset end position for error reporting
1390 currentPosition -= 2;
1391 throw new InvalidInputException(UNTERMINATED_STRING);
1392 } catch (InvalidInputException e) {
1393 if (e.getMessage().equals(INVALID_ESCAPE)) {
1394 // relocate if finding another quote fairly close: thus unicode
1395 // '/u000D' will be fully consumed
1396 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1397 if (currentPosition + lookAhead == source.length)
1399 if (source[currentPosition + lookAhead] == '\n')
1401 if (source[currentPosition + lookAhead] == '\"') {
1402 currentPosition += lookAhead + 1;
1410 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1411 // $NON-NLS-?$ where ? is an int.
1412 if (currentLine == null) {
1413 currentLine = new NLSLine ();
1414 lines.add (currentLine);
1416 currentLine.add (new StringLiteral (getCurrentTokenSourceString (), startPosition, currentPosition - 1));
1423 public TokenName getNextToken() throws InvalidInputException {
1425 return getInlinedHTMLToken(currentPosition);
1427 if (fFillerToken != TokenName.EOF) {
1428 TokenName tempToken;
1429 startPosition = currentPosition;
1430 tempToken = fFillerToken;
1431 fFillerToken = TokenName.EOF;
1434 this.wasAcr = false;
1436 jumpOverMethodBody();
1438 return currentPosition > source.length ? TokenName.EOF
1443 withoutUnicodePtr = 0;
1444 // ---------Consume white space and handles
1445 // startPosition---------
1446 int whiteStart = currentPosition;
1447 startPosition = currentPosition;
1448 currentCharacter = source[currentPosition++];
1450 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1451 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1452 checkNonExternalizeString();
1454 if (recordLineSeparator) {
1455 pushLineSeparator();
1460 startPosition = currentPosition;
1461 currentCharacter = source[currentPosition++];
1464 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1465 // reposition scanner in case we are interested by
1468 startPosition = whiteStart;
1469 return TokenName.WHITESPACE;
1471 // little trick to get out in the middle of a source
1473 if (currentPosition > eofPosition)
1474 return TokenName.EOF;
1475 // ---------Identify the next token-------------
1476 switch (currentCharacter) {
1478 return getCastOrParen();
1480 return TokenName.RPAREN;
1482 return TokenName.LBRACE;
1484 return TokenName.RBRACE;
1486 return TokenName.LBRACKET;
1488 return TokenName.RBRACKET;
1490 return TokenName.SEMICOLON;
1492 return TokenName.COMMA;
1494 if (getNextChar('='))
1495 return TokenName.DOT_EQUAL;
1496 if (getNextCharAsDigit())
1497 return scanNumber(true);
1498 return TokenName.DOT;
1500 return TokenName.BACKSLASH;
1503 if ((test = getNextChar('+', '=')) == 0)
1504 return TokenName.PLUS_PLUS;
1506 return TokenName.PLUS_EQUAL;
1507 return TokenName.PLUS;
1511 if ((test = getNextChar('-', '=')) == 0)
1512 return TokenName.MINUS_MINUS;
1514 return TokenName.MINUS_EQUAL;
1515 if (getNextChar('>'))
1516 return TokenName.MINUS_GREATER;
1517 return TokenName.MINUS;
1520 if (getNextChar('='))
1521 return TokenName.TWIDDLE_EQUAL;
1522 return TokenName.TWIDDLE;
1524 if (getNextChar('=')) {
1525 if (getNextChar('=')) {
1526 return TokenName.NOT_EQUAL_EQUAL;
1528 return TokenName.NOT_EQUAL;
1530 return TokenName.NOT;
1532 if (getNextChar('='))
1533 return TokenName.MULTIPLY_EQUAL;
1534 return TokenName.MULTIPLY;
1536 if (getNextChar('='))
1537 return TokenName.REMAINDER_EQUAL;
1538 return TokenName.REMAINDER;
1541 int oldPosition = currentPosition;
1544 currentCharacter = source[currentPosition++];
1545 } catch (IndexOutOfBoundsException e) {
1546 currentPosition = oldPosition;
1547 return TokenName.LESS;
1550 switch (currentCharacter) {
1552 return TokenName.LESS_EQUAL;
1555 return TokenName.NOT_EQUAL;
1558 if (getNextChar ('=')) {
1559 return TokenName.LEFT_SHIFT_EQUAL;
1562 if (getNextChar('<')) {
1563 currentCharacter = source[currentPosition++];
1565 while (Character.isWhitespace(currentCharacter)) {
1566 currentCharacter = source[currentPosition++];
1569 int heredocStart = currentPosition - 1;
1570 int heredocLength = 0;
1572 if (isPHPIdentifierStart (currentCharacter)) {
1573 currentCharacter = source[currentPosition++];
1576 return TokenName.ERROR;
1579 while (isPHPIdentifierPart(currentCharacter)) {
1580 currentCharacter = source[currentPosition++];
1583 heredocLength = currentPosition - heredocStart - 1;
1585 // heredoc end-tag determination
1586 boolean endTag = true;
1589 ch = source[currentPosition++];
1591 if (ch == '\r' || ch == '\n') {
1592 if (recordLineSeparator) {
1593 pushLineSeparator();
1599 for (int i = 0; i < heredocLength; i++) {
1600 if (source[currentPosition + i] != source[heredocStart + i]) {
1607 currentPosition += heredocLength - 1;
1608 currentCharacter = source[currentPosition++];
1609 break; // do...while loop
1617 return TokenName.HEREDOC;
1619 return TokenName.LEFT_SHIFT;
1621 currentPosition = oldPosition;
1622 return TokenName.LESS;
1627 if ((test = getNextChar('=', '>')) == 0)
1628 return TokenName.GREATER_EQUAL;
1630 if ((test = getNextChar('=', '>')) == 0)
1631 return TokenName.RIGHT_SHIFT_EQUAL;
1632 return TokenName.RIGHT_SHIFT;
1634 return TokenName.GREATER;
1637 if (getNextChar('=')) {
1638 if (getNextChar('=')) {
1639 return TokenName.EQUAL_EQUAL_EQUAL;
1641 return TokenName.EQUAL_EQUAL;
1643 if (getNextChar('>'))
1644 return TokenName.EQUAL_GREATER;
1645 return TokenName.EQUAL;
1648 if ((test = getNextChar('&', '=')) == 0)
1649 return TokenName.AND_AND;
1651 return TokenName.AND_EQUAL;
1652 return TokenName.OP_AND;
1656 if ((test = getNextChar('|', '=')) == 0)
1657 return TokenName.OR_OR;
1659 return TokenName.OR_EQUAL;
1660 return TokenName.OP_OR;
1663 if (getNextChar('='))
1664 return TokenName.XOR_EQUAL;
1665 return TokenName.OP_XOR;
1667 if (getNextChar('>')) {
1669 if (currentPosition == source.length) {
1671 return TokenName.INLINE_HTML;
1673 return getInlinedHTMLToken(currentPosition - 2);
1675 else if (getNextChar(':')) {
1676 return TokenName.TERNARY_SHORT;
1679 return TokenName.QUESTION;
1682 if (getNextChar(':'))
1683 return TokenName.PAAMAYIM_NEKUDOTAYIM;
1684 return TokenName.COLON;
1686 return TokenName.OP_AT;
1688 consumeStringConstant();
1689 return TokenName.STRINGSINGLEQUOTE;
1691 // if (tokenizeStrings) {
1692 consumeStringLiteral();
1693 return TokenName.STRINGDOUBLEQUOTE;
1695 // return TokenName.EncapsedString2;
1697 // if (tokenizeStrings) {
1698 consumeStringInterpolated();
1699 return TokenName.STRINGINTERPOLATED;
1701 // return TokenName.EncapsedString0;
1704 char startChar = currentCharacter;
1705 if (getNextChar('=') && startChar == '/') {
1706 return TokenName.DIVIDE_EQUAL;
1709 if ((startChar == '#')
1710 || (test = getNextChar('/', '*')) == 0) {
1712 this.lastCommentLinePosition = this.currentPosition;
1713 int endPositionForLineComment = 0;
1714 try { // get the next char
1715 currentCharacter = source[currentPosition++];
1716 // if (((currentCharacter =
1717 // source[currentPosition++])
1719 // && (source[currentPosition] == 'u')) {
1720 // //-------------unicode traitement
1722 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1723 // currentPosition++;
1724 // while (source[currentPosition] == 'u') {
1725 // currentPosition++;
1728 // Character.getNumericValue(source[currentPosition++]))
1732 // Character.getNumericValue(source[currentPosition++]))
1736 // Character.getNumericValue(source[currentPosition++]))
1740 // Character.getNumericValue(source[currentPosition++]))
1744 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1746 // currentCharacter =
1747 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
1751 // handle the \\u case manually into comment
1752 // if (currentCharacter == '\\') {
1753 // if (source[currentPosition] == '\\')
1754 // currentPosition++;
1755 // } //jump over the \\
1756 boolean isUnicode = false;
1757 while (currentCharacter != '\r'
1758 && currentCharacter != '\n') {
1759 this.lastCommentLinePosition = this.currentPosition;
1760 if (currentCharacter == '?') {
1761 if (getNextChar('>')) {
1762 // ?> breaks line comments
1763 startPosition = currentPosition - 2;
1765 return TokenName.INLINE_HTML;
1768 // get the next char
1770 currentCharacter = source[currentPosition++];
1771 // if (((currentCharacter =
1772 // source[currentPosition++])
1774 // && (source[currentPosition] == 'u')) {
1775 // isUnicode = true;
1776 // //-------------unicode traitement
1778 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1779 // currentPosition++;
1780 // while (source[currentPosition] == 'u') {
1781 // currentPosition++;
1784 // Character.getNumericValue(source[currentPosition++]))
1788 // Character.getNumericValue(
1789 // source[currentPosition++]))
1793 // Character.getNumericValue(
1794 // source[currentPosition++]))
1798 // Character.getNumericValue(
1799 // source[currentPosition++]))
1803 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1805 // currentCharacter =
1806 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
1810 // handle the \\u case manually into comment
1811 // if (currentCharacter == '\\') {
1812 // if (source[currentPosition] == '\\')
1813 // currentPosition++;
1814 // } //jump over the \\
1817 endPositionForLineComment = currentPosition - 6;
1819 endPositionForLineComment = currentPosition - 1;
1821 // recordComment(false);
1822 recordComment(TokenName.COMMENT_LINE);
1823 if (this.taskTags != null)
1824 checkTaskTag(this.startPosition,
1825 this.currentPosition);
1826 if ((currentCharacter == '\r')
1827 || (currentCharacter == '\n')) {
1828 checkNonExternalizeString();
1829 if (recordLineSeparator) {
1831 pushUnicodeLineSeparator();
1833 pushLineSeparator();
1839 if (tokenizeComments) {
1841 currentPosition = endPositionForLineComment;
1842 // reset one character behind
1844 return TokenName.COMMENT_LINE;
1846 } catch (IndexOutOfBoundsException e) { // an eof
1849 if (tokenizeComments) {
1851 // reset one character behind
1852 return TokenName.COMMENT_LINE;
1858 // traditional and annotation comment
1859 boolean isJavadoc = false, star = false;
1860 // consume next character
1861 unicodeAsBackSlash = false;
1862 currentCharacter = source[currentPosition++];
1863 // if (((currentCharacter =
1864 // source[currentPosition++]) ==
1866 // && (source[currentPosition] == 'u')) {
1867 // getNextUnicodeChar();
1869 // if (withoutUnicodePtr != 0) {
1870 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1871 // currentCharacter;
1874 if (currentCharacter == '*') {
1878 if ((currentCharacter == '\r')
1879 || (currentCharacter == '\n')) {
1880 checkNonExternalizeString();
1881 if (recordLineSeparator) {
1882 pushLineSeparator();
1887 try { // get the next char
1888 currentCharacter = source[currentPosition++];
1889 // if (((currentCharacter =
1890 // source[currentPosition++])
1892 // && (source[currentPosition] == 'u')) {
1893 // //-------------unicode traitement
1895 // getNextUnicodeChar();
1897 // handle the \\u case manually into comment
1898 // if (currentCharacter == '\\') {
1899 // if (source[currentPosition] == '\\')
1900 // currentPosition++;
1901 // //jump over the \\
1903 // empty comment is not a javadoc /**/
1904 if (currentCharacter == '/') {
1907 // loop until end of comment */
1908 while ((currentCharacter != '/') || (!star)) {
1909 if ((currentCharacter == '\r')
1910 || (currentCharacter == '\n')) {
1911 checkNonExternalizeString();
1912 if (recordLineSeparator) {
1913 pushLineSeparator();
1918 star = currentCharacter == '*';
1920 currentCharacter = source[currentPosition++];
1921 // if (((currentCharacter =
1922 // source[currentPosition++])
1924 // && (source[currentPosition] == 'u')) {
1925 // //-------------unicode traitement
1927 // getNextUnicodeChar();
1929 // handle the \\u case manually into comment
1930 // if (currentCharacter == '\\') {
1931 // if (source[currentPosition] == '\\')
1932 // currentPosition++;
1933 // } //jump over the \\
1935 // recordComment(isJavadoc);
1937 recordComment(TokenName.COMMENT_PHPDOC);
1939 recordComment(TokenName.COMMENT_BLOCK);
1942 if (tokenizeComments) {
1944 return TokenName.COMMENT_PHPDOC;
1945 return TokenName.COMMENT_BLOCK;
1948 if (this.taskTags != null) {
1949 checkTaskTag(this.startPosition,
1950 this.currentPosition);
1952 } catch (IndexOutOfBoundsException e) {
1953 // reset end position for error reporting
1954 currentPosition -= 2;
1955 throw new InvalidInputException(
1956 UNTERMINATED_COMMENT);
1960 return TokenName.DIVIDE;
1964 return TokenName.EOF;
1965 // the atEnd may not be <currentPosition ==
1966 // source.length> if
1967 // source is only some part of a real (external) stream
1968 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1970 if (currentCharacter == '$') {
1971 int oldPosition = currentPosition;
1973 currentCharacter = source[currentPosition++];
1974 if (isPHPIdentifierStart(currentCharacter)) {
1975 return scanIdentifierOrKeyword(true);
1977 currentPosition = oldPosition;
1978 return TokenName.DOLLAR;
1980 } catch (IndexOutOfBoundsException e) {
1981 currentPosition = oldPosition;
1982 return TokenName.DOLLAR;
1986 if (isPHPIdentifierStart(currentCharacter)) {
1987 return scanIdentifierOrKeyword(false);
1990 if (Character.isDigit(currentCharacter)) {
1991 return scanNumber(false);
1994 return TokenName.ERROR;
1997 } // -----------------end switch while try--------------------
1998 catch (IndexOutOfBoundsException e) {
2001 return TokenName.EOF;
2006 * @throws InvalidInputException
2008 private TokenName getInlinedHTMLToken(int start) throws InvalidInputException {
2009 boolean phpShortTag = false; // true, if <?= detected
2010 if (currentPosition > source.length) {
2011 currentPosition = source.length;
2012 return TokenName.EOF;
2014 startPosition = start;
2017 currentCharacter = source[currentPosition++];
2018 if (currentCharacter == '<') {
2019 if (getNextChar('?')) {
2020 currentCharacter = source[currentPosition++];
2021 if ((currentCharacter != 'P')
2022 && (currentCharacter != 'p')) {
2023 if (currentCharacter != '=') { // <?=
2025 phpShortTag = false;
2030 if (ignorePHPOneLiner) { // for CodeFormatter
2031 if (lookAheadLinePHPTag() == TokenName.INLINE_HTML) {
2034 fFillerToken = TokenName.ECHO_INVISIBLE;
2036 return TokenName.INLINE_HTML;
2039 boolean foundXML = false;
2040 if (getNextChar('X', 'x') >= 0) {
2041 if (getNextChar('M', 'm') >= 0) {
2042 if (getNextChar('L', 'l') >= 0) {
2051 fFillerToken = TokenName.ECHO_INVISIBLE;
2053 return TokenName.INLINE_HTML;
2056 if (getNextChar('H', 'h') >= 0) {
2057 if (getNextChar('P', 'p') >= 0) {
2059 if (ignorePHPOneLiner) {
2060 if (lookAheadLinePHPTag() == TokenName.INLINE_HTML) {
2062 return TokenName.INLINE_HTML;
2066 return TokenName.INLINE_HTML;
2074 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2075 if (recordLineSeparator) {
2076 pushLineSeparator();
2081 } // -----------------while--------------------
2083 return TokenName.INLINE_HTML;
2084 } // -----------------try--------------------
2085 catch (IndexOutOfBoundsException e) {
2086 startPosition = start;
2090 return TokenName.INLINE_HTML;
2094 * check if the PHP is only in this line (for CodeFormatter)
2098 private TokenName lookAheadLinePHPTag() {
2099 int currentPositionInLine = currentPosition;
2100 char previousCharInLine = ' ';
2101 char currentCharInLine = ' ';
2102 boolean singleQuotedStringActive = false;
2103 boolean doubleQuotedStringActive = false;
2106 // look ahead in this line
2108 previousCharInLine = currentCharInLine;
2109 currentCharInLine = source[currentPositionInLine++];
2110 switch (currentCharInLine) {
2112 if (previousCharInLine == '?') {
2113 // update the scanner's current Position in the source
2114 currentPosition = currentPositionInLine;
2115 // use as "dummy" token
2116 return TokenName.EOF;
2120 if (doubleQuotedStringActive) {
2121 // ignore escaped characters in double quoted strings
2122 previousCharInLine = currentCharInLine;
2123 currentCharInLine = source[currentPositionInLine++];
2126 if (doubleQuotedStringActive) {
2127 doubleQuotedStringActive = false;
2129 if (!singleQuotedStringActive) {
2130 doubleQuotedStringActive = true;
2135 if (singleQuotedStringActive) {
2136 if (previousCharInLine != '\\') {
2137 singleQuotedStringActive = false;
2140 if (!doubleQuotedStringActive) {
2141 singleQuotedStringActive = true;
2147 return TokenName.INLINE_HTML;
2149 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2151 return TokenName.INLINE_HTML;
2155 if (previousCharInLine == '/' && !singleQuotedStringActive
2156 && !doubleQuotedStringActive) {
2158 return TokenName.INLINE_HTML;
2162 if (previousCharInLine == '/' && !singleQuotedStringActive
2163 && !doubleQuotedStringActive) {
2165 return TokenName.INLINE_HTML;
2170 } catch (IndexOutOfBoundsException e) {
2172 currentPosition = currentPositionInLine - 1;
2173 return TokenName.INLINE_HTML;
2177 // public final void getNextUnicodeChar()
2178 // throws IndexOutOfBoundsException, InvalidInputException {
2180 // //handle the case of unicode.
2181 // //when a unicode appears then we must use a buffer that holds char
2183 // //At the end of this method currentCharacter holds the new visited char
2184 // //and currentPosition points right next after it
2186 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2188 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2189 // currentPosition++;
2190 // while (source[currentPosition] == 'u') {
2191 // currentPosition++;
2195 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2197 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2199 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2201 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2203 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2205 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2206 // //need the unicode buffer
2207 // if (withoutUnicodePtr == 0) {
2208 // //buffer all the entries that have been left aside....
2209 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2210 // System.arraycopy(
2213 // withoutUnicodeBuffer,
2215 // withoutUnicodePtr);
2217 // //fill the buffer with the char
2218 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2220 // unicodeAsBackSlash = currentCharacter == '\\';
2223 * Tokenize a method body, assuming that curly brackets are properly
2226 public final void jumpOverMethodBody() {
2227 this.wasAcr = false;
2230 while (true) { // loop for jumping over comments
2231 // ---------Consume white space and handles
2232 // startPosition---------
2233 boolean isWhiteSpace;
2235 startPosition = currentPosition;
2236 currentCharacter = source[currentPosition++];
2237 // if (((currentCharacter = source[currentPosition++]) ==
2239 // && (source[currentPosition] == 'u')) {
2240 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2242 if (recordLineSeparator
2243 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2244 pushLineSeparator();
2245 isWhiteSpace = Character.isWhitespace(currentCharacter);
2247 } while (isWhiteSpace);
2248 // -------consume token until } is found---------
2249 switch (currentCharacter) {
2260 test = getNextChar('\\');
2263 scanDoubleQuotedEscapeCharacter();
2264 } catch (InvalidInputException ex) {
2268 // try { // consume next character
2269 unicodeAsBackSlash = false;
2270 currentCharacter = source[currentPosition++];
2271 // if (((currentCharacter = source[currentPosition++])
2273 // && (source[currentPosition] == 'u')) {
2274 // getNextUnicodeChar();
2276 if (withoutUnicodePtr != 0) {
2277 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2280 // } catch (InvalidInputException ex) {
2288 // try { // consume next character
2289 unicodeAsBackSlash = false;
2290 currentCharacter = source[currentPosition++];
2291 // if (((currentCharacter = source[currentPosition++])
2293 // && (source[currentPosition] == 'u')) {
2294 // getNextUnicodeChar();
2296 if (withoutUnicodePtr != 0) {
2297 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2300 // } catch (InvalidInputException ex) {
2302 while (currentCharacter != '"') {
2303 if (currentCharacter == '\r') {
2304 if (source[currentPosition] == '\n')
2307 // the string cannot go further that the line
2309 if (currentCharacter == '\n') {
2311 // the string cannot go further that the line
2313 if (currentCharacter == '\\') {
2315 scanDoubleQuotedEscapeCharacter();
2316 } catch (InvalidInputException ex) {
2320 // try { // consume next character
2321 unicodeAsBackSlash = false;
2322 currentCharacter = source[currentPosition++];
2323 // if (((currentCharacter =
2324 // source[currentPosition++]) == '\\')
2325 // && (source[currentPosition] == 'u')) {
2326 // getNextUnicodeChar();
2328 if (withoutUnicodePtr != 0) {
2329 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2332 // } catch (InvalidInputException ex) {
2335 } catch (IndexOutOfBoundsException e) {
2341 if ((test = getNextChar('/', '*')) == 0) {
2344 // get the next char
2345 currentCharacter = source[currentPosition++];
2346 // if (((currentCharacter =
2347 // source[currentPosition++]) ==
2349 // && (source[currentPosition] == 'u')) {
2350 // //-------------unicode traitement ------------
2351 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2352 // currentPosition++;
2353 // while (source[currentPosition] == 'u') {
2354 // currentPosition++;
2357 // Character.getNumericValue(source[currentPosition++]))
2361 // Character.getNumericValue(source[currentPosition++]))
2365 // Character.getNumericValue(source[currentPosition++]))
2369 // Character.getNumericValue(source[currentPosition++]))
2372 // //error don't care of the value
2373 // currentCharacter = 'A';
2374 // } //something different from \n and \r
2376 // currentCharacter =
2377 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2380 while (currentCharacter != '\r'
2381 && currentCharacter != '\n') {
2382 // get the next char
2383 currentCharacter = source[currentPosition++];
2384 // if (((currentCharacter =
2385 // source[currentPosition++])
2387 // && (source[currentPosition] == 'u')) {
2388 // //-------------unicode traitement
2390 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2391 // currentPosition++;
2392 // while (source[currentPosition] == 'u') {
2393 // currentPosition++;
2396 // Character.getNumericValue(source[currentPosition++]))
2400 // Character.getNumericValue(source[currentPosition++]))
2404 // Character.getNumericValue(source[currentPosition++]))
2408 // Character.getNumericValue(source[currentPosition++]))
2411 // //error don't care of the value
2412 // currentCharacter = 'A';
2413 // } //something different from \n and \r
2415 // currentCharacter =
2416 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
2421 if (recordLineSeparator
2422 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2423 pushLineSeparator();
2424 } catch (IndexOutOfBoundsException e) {
2425 } // an eof will them be generated
2429 // traditional and annotation comment
2430 boolean star = false;
2431 // try { // consume next character
2432 unicodeAsBackSlash = false;
2433 currentCharacter = source[currentPosition++];
2434 // if (((currentCharacter = source[currentPosition++])
2436 // && (source[currentPosition] == 'u')) {
2437 // getNextUnicodeChar();
2439 if (withoutUnicodePtr != 0) {
2440 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2443 // } catch (InvalidInputException ex) {
2445 if (currentCharacter == '*') {
2448 if (recordLineSeparator
2449 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2450 pushLineSeparator();
2451 try { // get the next char
2452 currentCharacter = source[currentPosition++];
2453 // if (((currentCharacter =
2454 // source[currentPosition++]) ==
2456 // && (source[currentPosition] == 'u')) {
2457 // //-------------unicode traitement ------------
2458 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2459 // currentPosition++;
2460 // while (source[currentPosition] == 'u') {
2461 // currentPosition++;
2464 // Character.getNumericValue(source[currentPosition++]))
2468 // Character.getNumericValue(source[currentPosition++]))
2472 // Character.getNumericValue(source[currentPosition++]))
2476 // Character.getNumericValue(source[currentPosition++]))
2479 // //error don't care of the value
2480 // currentCharacter = 'A';
2481 // } //something different from * and /
2483 // currentCharacter =
2484 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2487 // loop until end of comment */
2488 while ((currentCharacter != '/') || (!star)) {
2489 if (recordLineSeparator
2490 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2491 pushLineSeparator();
2492 star = currentCharacter == '*';
2494 currentCharacter = source[currentPosition++];
2495 // if (((currentCharacter =
2496 // source[currentPosition++])
2498 // && (source[currentPosition] == 'u')) {
2499 // //-------------unicode traitement
2501 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2502 // currentPosition++;
2503 // while (source[currentPosition] == 'u') {
2504 // currentPosition++;
2507 // Character.getNumericValue(source[currentPosition++]))
2511 // Character.getNumericValue(source[currentPosition++]))
2515 // Character.getNumericValue(source[currentPosition++]))
2519 // Character.getNumericValue(source[currentPosition++]))
2522 // //error don't care of the value
2523 // currentCharacter = 'A';
2524 // } //something different from * and /
2526 // currentCharacter =
2527 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 +
2532 } catch (IndexOutOfBoundsException e) {
2540 if (isPHPIdentOrVarStart(currentCharacter)) {
2542 scanIdentifierOrKeyword((currentCharacter == '$'));
2543 } catch (InvalidInputException ex) {
2548 if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2549 // if (Character.isDigit(currentCharacter)) {
2552 } catch (InvalidInputException ex) {
2559 // -----------------end switch while try--------------------
2560 } catch (IndexOutOfBoundsException e) {
2561 } catch (InvalidInputException e) {
2566 // public final boolean jumpOverUnicodeWhiteSpace()
2567 // throws InvalidInputException {
2569 // //handle the case of unicode. Jump over the next whiteSpace
2570 // //making startPosition pointing on the next available char
2571 // //On false, the currentCharacter is filled up with a potential
2575 // this.wasAcr = false;
2576 // int c1, c2, c3, c4;
2577 // int unicodeSize = 6;
2578 // currentPosition++;
2579 // while (source[currentPosition] == 'u') {
2580 // currentPosition++;
2584 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2586 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2588 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2590 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2592 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2595 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2596 // if (recordLineSeparator
2597 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2598 // pushLineSeparator();
2599 // if (Character.isWhitespace(currentCharacter))
2602 // //buffer the new char which is not a white space
2603 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2604 // //withoutUnicodePtr == 1 is true here
2606 // } catch (IndexOutOfBoundsException e) {
2607 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2610 public final int[] getLineEnds() {
2611 // return a bounded copy of this.lineEnds
2613 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0,
2618 public char[] getSource() {
2622 public static boolean isIdentifierOrKeyword (TokenName token) {
2623 return (token == TokenName.IDENTIFIER) || (token.compareTo (TokenName.KEYWORD) > 0);
2626 final char[] optimizedCurrentTokenSource1() {
2627 // return always the same char[] build only once
2628 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2629 char charOne = source[startPosition];
2684 return new char[] { charOne };
2688 final char[] optimizedCurrentTokenSource2() {
2690 c0 = source[startPosition];
2691 c1 = source[startPosition + 1];
2693 // return always the same char[] build only once
2694 // optimization at no speed cost of 99.5 % of the
2695 // singleCharIdentifier
2698 return charArray_va;
2700 return charArray_vb;
2702 return charArray_vc;
2704 return charArray_vd;
2706 return charArray_ve;
2708 return charArray_vf;
2710 return charArray_vg;
2712 return charArray_vh;
2714 return charArray_vi;
2716 return charArray_vj;
2718 return charArray_vk;
2720 return charArray_vl;
2722 return charArray_vm;
2724 return charArray_vn;
2726 return charArray_vo;
2728 return charArray_vp;
2730 return charArray_vq;
2732 return charArray_vr;
2734 return charArray_vs;
2736 return charArray_vt;
2738 return charArray_vu;
2740 return charArray_vv;
2742 return charArray_vw;
2744 return charArray_vx;
2746 return charArray_vy;
2748 return charArray_vz;
2751 // try to return the same char[] build only once
2752 int hash = ((c0 << 6) + c1) % TableSize;
2753 char[][] table = charArray_length[0][hash];
2755 while (++i < InternalTableSize) {
2756 char[] charArray = table[i];
2757 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2760 // ---------other side---------
2762 int max = newEntry2;
2763 while (++i <= max) {
2764 char[] charArray = table[i];
2765 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2768 // --------add the entry-------
2769 if (++max >= InternalTableSize)
2772 table[max] = (r = new char[] { c0, c1 });
2777 final char[] optimizedCurrentTokenSource3() {
2778 // try to return the same char[] build only once
2780 int hash = (((c0 = source[startPosition]) << 12)
2781 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2783 char[][] table = charArray_length[1][hash];
2785 while (++i < InternalTableSize) {
2786 char[] charArray = table[i];
2787 if ((c0 == charArray[0]) && (c1 == charArray[1])
2788 && (c2 == charArray[2]))
2791 // ---------other side---------
2793 int max = newEntry3;
2794 while (++i <= max) {
2795 char[] charArray = table[i];
2796 if ((c0 == charArray[0]) && (c1 == charArray[1])
2797 && (c2 == charArray[2]))
2800 // --------add the entry-------
2801 if (++max >= InternalTableSize)
2804 table[max] = (r = new char[] { c0, c1, c2 });
2809 final char[] optimizedCurrentTokenSource4() {
2810 // try to return the same char[] build only once
2811 char c0, c1, c2, c3;
2812 long hash = ((((long) (c0 = source[startPosition])) << 18)
2813 + ((c1 = source[startPosition + 1]) << 12)
2814 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2816 char[][] table = charArray_length[2][(int) hash];
2818 while (++i < InternalTableSize) {
2819 char[] charArray = table[i];
2820 if ((c0 == charArray[0]) && (c1 == charArray[1])
2821 && (c2 == charArray[2]) && (c3 == charArray[3]))
2824 // ---------other side---------
2827 int max = newEntry4;
2828 while (++i <= max) {
2829 char[] charArray = table[i];
2830 if ((c0 == charArray[0]) && (c1 == charArray[1])
2831 && (c2 == charArray[2]) && (c3 == charArray[3]))
2834 // --------add the entry-------
2835 if (++max >= InternalTableSize)
2838 table[max] = (r = new char[] { c0, c1, c2, c3 });
2843 final char[] optimizedCurrentTokenSource5() {
2844 // try to return the same char[] build only once
2845 char c0, c1, c2, c3, c4;
2846 long hash = ((((long) (c0 = source[startPosition])) << 24)
2847 + (((long) (c1 = source[startPosition + 1])) << 18)
2848 + ((c2 = source[startPosition + 2]) << 12)
2849 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2851 char[][] table = charArray_length[3][(int) hash];
2853 while (++i < InternalTableSize) {
2854 char[] charArray = table[i];
2855 if ((c0 == charArray[0]) && (c1 == charArray[1])
2856 && (c2 == charArray[2]) && (c3 == charArray[3])
2857 && (c4 == charArray[4]))
2860 // ---------other side---------
2862 int max = newEntry5;
2863 while (++i <= max) {
2864 char[] charArray = table[i];
2865 if ((c0 == charArray[0]) && (c1 == charArray[1])
2866 && (c2 == charArray[2]) && (c3 == charArray[3])
2867 && (c4 == charArray[4]))
2870 // --------add the entry-------
2871 if (++max >= InternalTableSize)
2874 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2879 final char[] optimizedCurrentTokenSource6() {
2880 // try to return the same char[] build only once
2881 char c0, c1, c2, c3, c4, c5;
2882 long hash = ((((long) (c0 = source[startPosition])) << 32)
2883 + (((long) (c1 = source[startPosition + 1])) << 24)
2884 + (((long) (c2 = source[startPosition + 2])) << 18)
2885 + ((c3 = source[startPosition + 3]) << 12)
2886 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2888 char[][] table = charArray_length[4][(int) hash];
2890 while (++i < InternalTableSize) {
2891 char[] charArray = table[i];
2892 if ((c0 == charArray[0]) && (c1 == charArray[1])
2893 && (c2 == charArray[2]) && (c3 == charArray[3])
2894 && (c4 == charArray[4]) && (c5 == charArray[5]))
2897 // ---------other side---------
2899 int max = newEntry6;
2900 while (++i <= max) {
2901 char[] charArray = table[i];
2902 if ((c0 == charArray[0]) && (c1 == charArray[1])
2903 && (c2 == charArray[2]) && (c3 == charArray[3])
2904 && (c4 == charArray[4]) && (c5 == charArray[5]))
2907 // --------add the entry-------
2908 if (++max >= InternalTableSize)
2911 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2916 public final void pushLineSeparator() throws InvalidInputException {
2917 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2918 final int INCREMENT = 250;
2919 if (this.checkNonExternalizedStringLiterals) {
2920 // reinitialize the current line for non externalize strings purpose
2923 // currentCharacter is at position currentPosition-1
2925 if (currentCharacter == '\r') {
2926 int separatorPos = currentPosition - 1;
2927 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2929 // System.out.println("CR-" + separatorPos);
2931 lineEnds[++linePtr] = separatorPos;
2932 } catch (IndexOutOfBoundsException e) {
2933 // linePtr value is correct
2934 int oldLength = lineEnds.length;
2935 int[] old = lineEnds;
2936 lineEnds = new int[oldLength + INCREMENT];
2937 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2938 lineEnds[linePtr] = separatorPos;
2940 // look-ahead for merged cr+lf
2942 if (source[currentPosition] == '\n') {
2943 // System.out.println("look-ahead LF-" + currentPosition);
2944 lineEnds[linePtr] = currentPosition;
2950 } catch (IndexOutOfBoundsException e) {
2955 if (currentCharacter == '\n') {
2956 // must merge eventual cr followed by lf
2957 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2958 // System.out.println("merge LF-" + (currentPosition - 1));
2959 lineEnds[linePtr] = currentPosition - 1;
2961 int separatorPos = currentPosition - 1;
2962 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2964 // System.out.println("LF-" + separatorPos);
2966 lineEnds[++linePtr] = separatorPos;
2967 } catch (IndexOutOfBoundsException e) {
2968 // linePtr value is correct
2969 int oldLength = lineEnds.length;
2970 int[] old = lineEnds;
2971 lineEnds = new int[oldLength + INCREMENT];
2972 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2973 lineEnds[linePtr] = separatorPos;
2981 public final void pushUnicodeLineSeparator() {
2982 // isUnicode means that the \r or \n has been read as a unicode
2984 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2985 final int INCREMENT = 250;
2986 // currentCharacter is at position currentPosition-1
2987 if (this.checkNonExternalizedStringLiterals) {
2988 // reinitialize the current line for non externalize strings purpose
2992 if (currentCharacter == '\r') {
2993 int separatorPos = currentPosition - 6;
2994 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2996 // System.out.println("CR-" + separatorPos);
2998 lineEnds[++linePtr] = separatorPos;
2999 } catch (IndexOutOfBoundsException e) {
3000 // linePtr value is correct
3001 int oldLength = lineEnds.length;
3002 int[] old = lineEnds;
3003 lineEnds = new int[oldLength + INCREMENT];
3004 System.arraycopy(old, 0, lineEnds, 0, oldLength);
3005 lineEnds[linePtr] = separatorPos;
3007 // look-ahead for merged cr+lf
3008 if (source[currentPosition] == '\n') {
3009 // System.out.println("look-ahead LF-" + currentPosition);
3010 lineEnds[linePtr] = currentPosition;
3018 if (currentCharacter == '\n') {
3019 // must merge eventual cr followed by lf
3020 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
3021 // System.out.println("merge LF-" + (currentPosition - 1));
3022 lineEnds[linePtr] = currentPosition - 6;
3024 int separatorPos = currentPosition - 6;
3025 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
3027 // System.out.println("LF-" + separatorPos);
3029 lineEnds[++linePtr] = separatorPos;
3030 } catch (IndexOutOfBoundsException e) {
3031 // linePtr value is correct
3032 int oldLength = lineEnds.length;
3033 int[] old = lineEnds;
3034 lineEnds = new int[oldLength + INCREMENT];
3035 System.arraycopy(old, 0, lineEnds, 0, oldLength);
3036 lineEnds[linePtr] = separatorPos;
3044 public void recordComment(TokenName token) {
3046 int stopPosition = this.currentPosition;
3049 stopPosition = -this.lastCommentLinePosition;
3052 stopPosition = -this.currentPosition;
3056 // a new comment is recorded
3057 int length = this.commentStops.length;
3058 if (++this.commentPtr >= length) {
3059 System.arraycopy(this.commentStops, 0,
3060 this.commentStops = new int[length + 30], 0, length);
3061 // grows the positions buffers too
3062 System.arraycopy(this.commentStarts, 0,
3063 this.commentStarts = new int[length + 30], 0, length);
3065 this.commentStops[this.commentPtr] = stopPosition;
3066 this.commentStarts[this.commentPtr] = this.startPosition;
3069 // public final void recordComment(boolean isJavadoc) {
3070 // // a new annotation comment is recorded
3072 // commentStops[++commentPtr] = isJavadoc
3073 // ? currentPosition
3074 // : -currentPosition;
3075 // } catch (IndexOutOfBoundsException e) {
3076 // int oldStackLength = commentStops.length;
3077 // int[] oldStack = commentStops;
3078 // commentStops = new int[oldStackLength + 30];
3079 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
3080 // commentStops[commentPtr] = isJavadoc ? currentPosition :
3081 // -currentPosition;
3082 // //grows the positions buffers too
3083 // int[] old = commentStarts;
3084 // commentStarts = new int[oldStackLength + 30];
3085 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
3087 // //the buffer is of a correct size here
3088 // commentStarts[commentPtr] = startPosition;
3090 public void resetTo(int begin, int end) {
3091 // reset the scanner to a given position where it may rescan again
3093 initialPosition = startPosition = currentPosition = begin;
3094 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
3095 commentPtr = -1; // reset comment stack
3098 public final void scanSingleQuotedEscapeCharacter()
3099 throws InvalidInputException {
3100 // the string with "\\u" is a legal string of two chars \ and u
3101 // thus we use a direct access to the source (for regular cases).
3102 // if (unicodeAsBackSlash) {
3103 // // consume next character
3104 // unicodeAsBackSlash = false;
3105 // if (((currentCharacter = source[currentPosition++]) == '\\')
3106 // && (source[currentPosition] == 'u')) {
3107 // getNextUnicodeChar();
3109 // if (withoutUnicodePtr != 0) {
3110 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3114 currentCharacter = source[currentPosition++];
3115 switch (currentCharacter) {
3117 currentCharacter = '\'';
3120 currentCharacter = '\\';
3123 currentCharacter = '\\';
3128 public final void scanDoubleQuotedEscapeCharacter()
3129 throws InvalidInputException {
3130 currentCharacter = source[currentPosition++];
3131 switch (currentCharacter) {
3133 // currentCharacter = '\b';
3136 currentCharacter = '\t';
3139 currentCharacter = '\n';
3142 // currentCharacter = '\f';
3145 currentCharacter = '\r';
3148 currentCharacter = '\"';
3151 currentCharacter = '\'';
3154 currentCharacter = '\\';
3157 currentCharacter = '$';
3160 // -----------octal escape--------------
3162 // OctalDigit OctalDigit
3163 // ZeroToThree OctalDigit OctalDigit
3164 int number = Character.getNumericValue(currentCharacter);
3165 if (number >= 0 && number <= 7) {
3166 boolean zeroToThreeNot = number > 3;
3168 .isDigit(currentCharacter = source[currentPosition++])) {
3169 int digit = Character.getNumericValue(currentCharacter);
3170 if (digit >= 0 && digit <= 7) {
3171 number = (number * 8) + digit;
3173 .isDigit(currentCharacter = source[currentPosition++])) {
3174 if (zeroToThreeNot) { // has read \NotZeroToThree
3176 // Digit --> ignore last character
3180 .getNumericValue(currentCharacter);
3181 if (digit >= 0 && digit <= 7) {
3182 // has read \ZeroToThree OctalDigit
3184 number = (number * 8) + digit;
3185 } else { // has read \ZeroToThree OctalDigit
3187 // --> ignore last character
3191 } else { // has read \OctalDigit NonDigit--> ignore
3196 } else { // has read \OctalDigit NonOctalDigit--> ignore
3201 } else { // has read \OctalDigit --> ignore last character
3205 throw new InvalidInputException(INVALID_ESCAPE);
3206 currentCharacter = (char) number;
3209 // throw new InvalidInputException(INVALID_ESCAPE);
3213 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3214 // return scanIdentifierOrKeyword( false );
3216 public TokenName scanIdentifierOrKeyword(boolean isVariable)
3217 throws InvalidInputException {
3219 // first dispatch on the first char.
3220 // then the length. If there are several
3221 // keywords with the same length AND the same first char, then do another
3222 // disptach on the second char :-)...cool....but fast !
3223 useAssertAsAnIndentifier = false;
3224 while (getNextCharAsJavaIdentifierPart()) {
3228 // if (new String(getCurrentTokenSource()).equals("$this")) {
3229 // return TokenName.this;
3231 return TokenName.VARIABLE;
3236 // if (withoutUnicodePtr == 0)
3237 // quick test on length == 1 but not on length > 12 while most
3239 // have a length which is <= 12...but there are lots of identifier with
3240 // only one char....
3242 if ((length = currentPosition - startPosition) == 1)
3243 return TokenName.IDENTIFIER;
3245 data = new char[length];
3246 index = startPosition;
3247 for (int i = 0; i < length; i++) {
3248 data[i] = Character.toLowerCase(source[index + i]);
3252 // if ((length = withoutUnicodePtr) == 1)
3253 // return TokenName.Identifier;
3254 // // data = withoutUnicodeBuffer;
3255 // data = new char[withoutUnicodeBuffer.length];
3256 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3257 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3261 firstLetter = data[index];
3262 switch (firstLetter) {
3267 if ((data[++index] == '_') && (data[++index] == 'f')
3268 && (data[++index] == 'i') && (data[++index] == 'l')
3269 && (data[++index] == 'e') && (data[++index] == '_')
3270 && (data[++index] == '_'))
3271 return TokenName.FILE;
3272 index = 0; // __LINE__
3273 if ((data[++index] == '_') && (data[++index] == 'l')
3274 && (data[++index] == 'i') && (data[++index] == 'n')
3275 && (data[++index] == 'e') && (data[++index] == '_')
3276 && (data[++index] == '_'))
3277 return TokenName.LINE;
3281 if ((data[++index] == '_') && (data[++index] == 'c')
3282 && (data[++index] == 'l') && (data[++index] == 'a')
3283 && (data[++index] == 's') && (data[++index] == 's')
3284 && (data[++index] == '_') && (data[++index] == '_'))
3285 return TokenName.CLASS_C;
3289 if ((data[++index] == '_') && (data[++index] == 'm')
3290 && (data[++index] == 'e') && (data[++index] == 't')
3291 && (data[++index] == 'h') && (data[++index] == 'o')
3292 && (data[++index] == 'd') && (data[++index] == '_')
3293 && (data[++index] == '_'))
3294 return TokenName.METHOD_C;
3298 if ((data[++index] == '_') && (data[++index] == 'f')
3299 && (data[++index] == 'u') && (data[++index] == 'n')
3300 && (data[++index] == 'c') && (data[++index] == 't')
3301 && (data[++index] == 'i') && (data[++index] == 'o')
3302 && (data[++index] == 'n') && (data[++index] == '_')
3303 && (data[++index] == '_'))
3304 return TokenName.FUNC_C;
3307 return TokenName.IDENTIFIER;
3309 // as and array abstract
3313 if ((data[++index] == 's')) {
3314 return TokenName.AS;
3316 return TokenName.IDENTIFIER;
3319 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3320 return TokenName.OP_AND_OLD;
3322 return TokenName.IDENTIFIER;
3325 if ((data[++index] == 'r') && (data[++index] == 'r')
3326 && (data[++index] == 'a') && (data[++index] == 'y'))
3327 return TokenName.ARRAY;
3328 return TokenName.IDENTIFIER;
3330 if ((data[++index] == 'b') && (data[++index] == 's')
3331 && (data[++index] == 't') && (data[++index] == 'r')
3332 && (data[++index] == 'a') && (data[++index] == 'c')
3333 && (data[++index] == 't'))
3334 return TokenName.ABSTRACT;
3335 return TokenName.IDENTIFIER;
3337 return TokenName.IDENTIFIER;
3342 if ((data[++index] == 'r') && (data[++index] == 'e')
3343 && (data[++index] == 'a') && (data[++index] == 'k'))
3344 return TokenName.BREAK;
3345 return TokenName.IDENTIFIER;
3347 return TokenName.IDENTIFIER;
3349 // case catch class clone const continue
3352 if ((data[++index] == 'a') && (data[++index] == 's')
3353 && (data[++index] == 'e'))
3354 return TokenName.CASE;
3355 return TokenName.IDENTIFIER;
3357 if ((data[++index] == 'a') && (data[++index] == 't')
3358 && (data[++index] == 'c') && (data[++index] == 'h'))
3359 return TokenName.CATCH;
3361 if ((data[++index] == 'l') && (data[++index] == 'a')
3362 && (data[++index] == 's') && (data[++index] == 's'))
3363 return TokenName.CLASS;
3365 if ((data[++index] == 'l') && (data[++index] == 'o')
3366 && (data[++index] == 'n') && (data[++index] == 'e'))
3367 return TokenName.CLONE;
3369 if ((data[++index] == 'o') && (data[++index] == 'n')
3370 && (data[++index] == 's') && (data[++index] == 't'))
3371 return TokenName.CONST;
3372 return TokenName.IDENTIFIER;
3374 if ((data[++index] == 'o') && (data[++index] == 'n')
3375 && (data[++index] == 't') && (data[++index] == 'i')
3376 && (data[++index] == 'n') && (data[++index] == 'u')
3377 && (data[++index] == 'e'))
3378 return TokenName.CONTINUE;
3379 return TokenName.IDENTIFIER;
3381 return TokenName.IDENTIFIER;
3383 // declare default do die
3384 // TODO delete define ==> no keyword !
3387 if ((data[++index] == 'o'))
3388 return TokenName.DO;
3389 return TokenName.IDENTIFIER;
3391 // if ((data[++index] == 'e')
3392 // && (data[++index] == 'f')
3393 // && (data[++index] == 'i')
3394 // && (data[++index] == 'n')
3395 // && (data[++index] == 'e'))
3396 // return TokenName.define;
3398 // return TokenName.Identifier;
3400 if ((data[++index] == 'e') && (data[++index] == 'c')
3401 && (data[++index] == 'l') && (data[++index] == 'a')
3402 && (data[++index] == 'r') && (data[++index] == 'e'))
3403 return TokenName.DECLARE;
3405 if ((data[++index] == 'e') && (data[++index] == 'f')
3406 && (data[++index] == 'a') && (data[++index] == 'u')
3407 && (data[++index] == 'l') && (data[++index] == 't'))
3408 return TokenName.DEFAULT;
3409 return TokenName.IDENTIFIER;
3411 return TokenName.IDENTIFIER;
3413 // echo else exit elseif extends eval
3416 if ((data[++index] == 'c') && (data[++index] == 'h')
3417 && (data[++index] == 'o'))
3418 return TokenName.ECHO;
3419 else if ((data[index] == 'l') && (data[++index] == 's')
3420 && (data[++index] == 'e'))
3421 return TokenName.ELSE;
3422 else if ((data[index] == 'x') && (data[++index] == 'i')
3423 && (data[++index] == 't'))
3424 return TokenName.EXIT;
3425 else if ((data[index] == 'v') && (data[++index] == 'a')
3426 && (data[++index] == 'l'))
3427 return TokenName.EVAL;
3428 return TokenName.IDENTIFIER;
3431 if ((data[++index] == 'n') && (data[++index] == 'd')
3432 && (data[++index] == 'i') && (data[++index] == 'f'))
3433 return TokenName.ENDIF;
3434 if ((data[index] == 'm') && (data[++index] == 'p')
3435 && (data[++index] == 't') && (data[++index] == 'y'))
3436 return TokenName.EMPTY;
3437 return TokenName.IDENTIFIER;
3440 if ((data[++index] == 'n') && (data[++index] == 'd')
3441 && (data[++index] == 'f') && (data[++index] == 'o')
3442 && (data[++index] == 'r'))
3443 return TokenName.ENDFOR;
3444 else if ((data[index] == 'l') && (data[++index] == 's')
3445 && (data[++index] == 'e') && (data[++index] == 'i')
3446 && (data[++index] == 'f'))
3447 return TokenName.ELSEIF;
3448 return TokenName.IDENTIFIER;
3450 if ((data[++index] == 'x') && (data[++index] == 't')
3451 && (data[++index] == 'e') && (data[++index] == 'n')
3452 && (data[++index] == 'd') && (data[++index] == 's'))
3453 return TokenName.EXTENDS;
3454 return TokenName.IDENTIFIER;
3457 if ((data[++index] == 'n') && (data[++index] == 'd')
3458 && (data[++index] == 'w') && (data[++index] == 'h')
3459 && (data[++index] == 'i') && (data[++index] == 'l')
3460 && (data[++index] == 'e'))
3461 return TokenName.ENDWHILE;
3462 return TokenName.IDENTIFIER;
3465 if ((data[++index] == 'n') && (data[++index] == 'd')
3466 && (data[++index] == 's') && (data[++index] == 'w')
3467 && (data[++index] == 'i') && (data[++index] == 't')
3468 && (data[++index] == 'c') && (data[++index] == 'h'))
3469 return TokenName.ENDSWITCH;
3470 return TokenName.IDENTIFIER;
3473 if ((data[++index] == 'n') && (data[++index] == 'd')
3474 && (data[++index] == 'd') && (data[++index] == 'e')
3475 && (data[++index] == 'c') && (data[++index] == 'l')
3476 && (data[++index] == 'a') && (data[++index] == 'r')
3477 && (data[++index] == 'e'))
3478 return TokenName.ENDDECLARE;
3480 if ((data[++index] == 'n') // endforeach
3481 && (data[++index] == 'd')
3482 && (data[++index] == 'f')
3483 && (data[++index] == 'o')
3484 && (data[++index] == 'r')
3485 && (data[++index] == 'e')
3486 && (data[++index] == 'a')
3487 && (data[++index] == 'c') && (data[++index] == 'h'))
3488 return TokenName.ENDFOREACH;
3489 return TokenName.IDENTIFIER;
3491 return TokenName.IDENTIFIER;
3493 // for false final function
3496 if ((data[++index] == 'o') && (data[++index] == 'r'))
3497 return TokenName.FOR;
3498 return TokenName.IDENTIFIER;
3500 // if ((data[++index] == 'a') && (data[++index] == 'l')
3501 // && (data[++index] == 's') && (data[++index] == 'e'))
3502 // return TokenName.false;
3503 if ((data[++index] == 'i') && (data[++index] == 'n')
3504 && (data[++index] == 'a') && (data[++index] == 'l'))
3505 return TokenName.FINAL;
3506 return TokenName.IDENTIFIER;
3509 if ((data[++index] == 'o') && (data[++index] == 'r')
3510 && (data[++index] == 'e') && (data[++index] == 'a')
3511 && (data[++index] == 'c') && (data[++index] == 'h'))
3512 return TokenName.FOREACH;
3513 return TokenName.IDENTIFIER;
3516 if ((data[++index] == 'u') && (data[++index] == 'n')
3517 && (data[++index] == 'c') && (data[++index] == 't')
3518 && (data[++index] == 'i') && (data[++index] == 'o')
3519 && (data[++index] == 'n'))
3520 return TokenName.FUNCTION;
3521 return TokenName.IDENTIFIER;
3523 return TokenName.IDENTIFIER;
3527 if ((data[++index] == 'l') && (data[++index] == 'o')
3528 && (data[++index] == 'b') && (data[++index] == 'a')
3529 && (data[++index] == 'l')) {
3530 return TokenName.GLOBAL;
3533 else if (length == 4) { // goto
3534 if ((data[++index] == 'o') &&
3535 (data[++index] == 't') &&
3536 (data[++index] == 'o')) {
3537 return TokenName.GOTO;
3540 return TokenName.IDENTIFIER;
3542 // if int isset include include_once instanceof interface implements
3545 if (data[++index] == 'f')
3546 return TokenName.IF;
3547 return TokenName.IDENTIFIER;
3549 // if ((data[++index] == 'n') && (data[++index] == 't'))
3550 // return TokenName.int;
3552 // return TokenName.IDENTIFIER;
3554 if ((data[++index] == 's') && (data[++index] == 's')
3555 && (data[++index] == 'e') && (data[++index] == 't'))
3556 return TokenName.ISSET;
3557 return TokenName.IDENTIFIER;
3559 if ((data[++index] == 'n') && (data[++index] == 'c')
3560 && (data[++index] == 'l') && (data[++index] == 'u')
3561 && (data[++index] == 'd') && (data[++index] == 'e'))
3562 return TokenName.INCLUDE;
3563 return TokenName.IDENTIFIER;
3566 if ((data[++index] == 'n') && (data[++index] == 't')
3567 && (data[++index] == 'e') && (data[++index] == 'r')
3568 && (data[++index] == 'f') && (data[++index] == 'a')
3569 && (data[++index] == 'c') && (data[++index] == 'e'))
3570 return TokenName.INTERFACE;
3571 return TokenName.IDENTIFIER;
3573 // instanceof implements
3574 if ((data[++index] == 'n') && (data[++index] == 's')
3575 && (data[++index] == 't') && (data[++index] == 'a')
3576 && (data[++index] == 'n') && (data[++index] == 'c')
3577 && (data[++index] == 'e') && (data[++index] == 'o')
3578 && (data[++index] == 'f'))
3579 return TokenName.INSTANCEOF;
3580 if ((data[index] == 'm') && (data[++index] == 'p')
3581 && (data[++index] == 'l') && (data[++index] == 'e')
3582 && (data[++index] == 'm') && (data[++index] == 'e')
3583 && (data[++index] == 'n') && (data[++index] == 't')
3584 && (data[++index] == 's'))
3585 return TokenName.IMPLEMENTS;
3586 return TokenName.IDENTIFIER;
3587 case 12: // include_once
3588 if ((data[++index] == 'n') && (data[++index] == 'c')
3589 && (data[++index] == 'l') && (data[++index] == 'u')
3590 && (data[++index] == 'd') && (data[++index] == 'e')
3591 && (data[++index] == '_') && (data[++index] == 'o')
3592 && (data[++index] == 'n') && (data[++index] == 'c')
3593 && (data[++index] == 'e'))
3594 return TokenName.INCLUDE_ONCE;
3595 return TokenName.IDENTIFIER;
3597 return TokenName.IDENTIFIER;
3601 if ((data[++index] == 'i') && (data[++index] == 's')
3602 && (data[++index] == 't')) {
3603 return TokenName.LIST;
3606 return TokenName.IDENTIFIER;
3608 // new null namespace
3611 if ((data[++index] == 'e') && (data[++index] == 'w'))
3612 return TokenName.NEW;
3613 return TokenName.IDENTIFIER;
3615 // if ((data[++index] == 'u') && (data[++index] == 'l')
3616 // && (data[++index] == 'l'))
3617 // return TokenName.null;
3619 // return TokenName.IDENTIFIER;
3621 if ((data[++index] == 'a') && (data[++index] == 'm')
3622 && (data[++index] == 'e') && (data[++index] == 's')
3623 && (data[++index] == 'p') && (data[++index] == 'a')
3624 && (data[++index] == 'c') && (data[++index] == 'e')) {
3625 return TokenName.NAMESPACE;
3627 return TokenName.IDENTIFIER;
3629 return TokenName.IDENTIFIER;
3633 if (data[++index] == 'r') {
3634 return TokenName.OP_OR_OLD;
3637 // if (length == 12) {
3638 // if ((data[++index] == 'l')
3639 // && (data[++index] == 'd')
3640 // && (data[++index] == '_')
3641 // && (data[++index] == 'f')
3642 // && (data[++index] == 'u')
3643 // && (data[++index] == 'n')
3644 // && (data[++index] == 'c')
3645 // && (data[++index] == 't')
3646 // && (data[++index] == 'i')
3647 // && (data[++index] == 'o')
3648 // && (data[++index] == 'n')) {
3649 // return TokenName.old_function;
3652 return TokenName.IDENTIFIER;
3654 // print public private protected
3657 if ((data[++index] == 'r') && (data[++index] == 'i')
3658 && (data[++index] == 'n') && (data[++index] == 't')) {
3659 return TokenName.PRINT;
3661 return TokenName.IDENTIFIER;
3663 if ((data[++index] == 'u') && (data[++index] == 'b')
3664 && (data[++index] == 'l') && (data[++index] == 'i')
3665 && (data[++index] == 'c')) {
3666 return TokenName.PUBLIC;
3668 return TokenName.IDENTIFIER;
3670 if ((data[++index] == 'r') && (data[++index] == 'i')
3671 && (data[++index] == 'v') && (data[++index] == 'a')
3672 && (data[++index] == 't') && (data[++index] == 'e')) {
3673 return TokenName.PRIVATE;
3675 return TokenName.IDENTIFIER;
3677 if ((data[++index] == 'r') && (data[++index] == 'o')
3678 && (data[++index] == 't') && (data[++index] == 'e')
3679 && (data[++index] == 'c') && (data[++index] == 't')
3680 && (data[++index] == 'e') && (data[++index] == 'd')) {
3681 return TokenName.PROTECTED;
3683 return TokenName.IDENTIFIER;
3685 return TokenName.IDENTIFIER;
3687 // return require require_once
3689 if ((data[++index] == 'e') && (data[++index] == 't')
3690 && (data[++index] == 'u') && (data[++index] == 'r')
3691 && (data[++index] == 'n')) {
3692 return TokenName.RETURN;
3694 } else if (length == 7) {
3695 if ((data[++index] == 'e') && (data[++index] == 'q')
3696 && (data[++index] == 'u') && (data[++index] == 'i')
3697 && (data[++index] == 'r') && (data[++index] == 'e')) {
3698 return TokenName.REQUIRE;
3700 } else if (length == 12) {
3701 if ((data[++index] == 'e') && (data[++index] == 'q')
3702 && (data[++index] == 'u') && (data[++index] == 'i')
3703 && (data[++index] == 'r') && (data[++index] == 'e')
3704 && (data[++index] == '_') && (data[++index] == 'o')
3705 && (data[++index] == 'n') && (data[++index] == 'c')
3706 && (data[++index] == 'e')) {
3707 return TokenName.REQUIRE_ONCE;
3710 return TokenName.IDENTIFIER;
3712 // self static switch
3715 // if ((data[++index] == 'e') && (data[++index] == 'l') &&
3718 // return TokenName.self;
3720 // return TokenName.IDENTIFIER;
3722 if (data[++index] == 't')
3723 if ((data[++index] == 'a') && (data[++index] == 't')
3724 && (data[++index] == 'i') && (data[++index] == 'c')) {
3725 return TokenName.STATIC;
3727 return TokenName.IDENTIFIER;
3728 else if ((data[index] == 'w') && (data[++index] == 'i')
3729 && (data[++index] == 't') && (data[++index] == 'c')
3730 && (data[++index] == 'h'))
3731 return TokenName.SWITCH;
3733 return TokenName.IDENTIFIER;
3738 if ((data[++index] == 'r') && (data[++index] == 'y'))
3739 return TokenName.TRY;
3741 // if ((data[++index] == 'r') && (data[++index] == 'u')
3742 // && (data[++index] == 'e'))
3743 // return TokenName.true;
3745 return TokenName.IDENTIFIER;
3747 if ((data[++index] == 'h') && (data[++index] == 'r')
3748 && (data[++index] == 'o') && (data[++index] == 'w'))
3749 return TokenName.THROW;
3751 return TokenName.IDENTIFIER;
3756 if ((data[++index] == 's') && (data[++index] == 'e'))
3757 return TokenName.USE;
3759 return TokenName.IDENTIFIER;
3761 if ((data[++index] == 'n') && (data[++index] == 's')
3762 && (data[++index] == 'e') && (data[++index] == 't'))
3763 return TokenName.UNSET;
3765 return TokenName.IDENTIFIER;
3770 if ((data[++index] == 'a') && (data[++index] == 'r'))
3771 return TokenName.VAR;
3773 return TokenName.IDENTIFIER;
3778 if ((data[++index] == 'h') && (data[++index] == 'i')
3779 && (data[++index] == 'l') && (data[++index] == 'e'))
3780 return TokenName.WHILE;
3781 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3782 // (data[++index]=='e') && (data[++index]=='f')&&
3783 // (data[++index]=='p'))
3784 // return TokenName.widefp ;
3786 // return TokenName.IDENTIFIER;
3788 return TokenName.IDENTIFIER;
3793 if ((data[++index] == 'o') && (data[++index] == 'r'))
3794 return TokenName.OP_XOR_OLD;
3796 return TokenName.IDENTIFIER;
3798 return TokenName.IDENTIFIER;
3800 return TokenName.IDENTIFIER;
3803 public TokenName scanNumber(boolean dotPrefix) throws InvalidInputException {
3804 // when entering this method the currentCharacter is the firt
3805 // digit of the number , i.e. it may be preceeded by a . when
3806 // dotPrefix is true
3807 boolean floating = dotPrefix;
3808 if ((!dotPrefix) && (currentCharacter == '0')) {
3809 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3810 // force the first char of the hexa number do exist...
3811 // consume next character
3812 unicodeAsBackSlash = false;
3813 currentCharacter = source[currentPosition++];
3814 // if (((currentCharacter = source[currentPosition++]) == '\\')
3815 // && (source[currentPosition] == 'u')) {
3816 // getNextUnicodeChar();
3818 // if (withoutUnicodePtr != 0) {
3819 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3822 if (Character.digit(currentCharacter, 16) == -1)
3823 throw new InvalidInputException(INVALID_HEXA);
3825 while (getNextCharAsDigit(16)) {
3828 // if (getNextChar('l', 'L') >= 0)
3829 // return TokenName.LongLiteral;
3831 return TokenName.INTEGERLITERAL;
3833 // there is x or X in the number
3834 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3835 // 00078.0 is true !!!!! crazy language
3836 if (getNextCharAsDigit()) {
3837 // -------------potential octal-----------------
3838 while (getNextCharAsDigit()) {
3841 // if (getNextChar('l', 'L') >= 0) {
3842 // return TokenName.LongLiteral;
3845 // if (getNextChar('f', 'F') >= 0) {
3846 // return TokenName.FloatingPointLiteral;
3848 if (getNextChar('d', 'D') >= 0) {
3849 return TokenName.DOUBLELITERAL;
3850 } else { // make the distinction between octal and float ....
3851 if (getNextChar('.')) { // bingo ! ....
3852 while (getNextCharAsDigit()) {
3855 if (getNextChar('e', 'E') >= 0) {
3856 // consume next character
3857 unicodeAsBackSlash = false;
3858 currentCharacter = source[currentPosition++];
3859 // if (((currentCharacter =
3860 // source[currentPosition++]) == '\\')
3861 // && (source[currentPosition] == 'u')) {
3862 // getNextUnicodeChar();
3864 // if (withoutUnicodePtr != 0) {
3865 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3866 // currentCharacter;
3869 if ((currentCharacter == '-')
3870 || (currentCharacter == '+')) {
3871 // consume next character
3872 unicodeAsBackSlash = false;
3873 currentCharacter = source[currentPosition++];
3874 // if (((currentCharacter =
3875 // source[currentPosition++]) == '\\')
3876 // && (source[currentPosition] == 'u')) {
3877 // getNextUnicodeChar();
3879 // if (withoutUnicodePtr != 0) {
3880 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3881 // currentCharacter;
3885 if (!Character.isDigit(currentCharacter))
3886 throw new InvalidInputException(INVALID_FLOAT);
3887 while (getNextCharAsDigit()) {
3891 // if (getNextChar('f', 'F') >= 0)
3892 // return TokenName.FloatingPointLiteral;
3893 getNextChar('d', 'D'); // jump over potential d or D
3894 return TokenName.DOUBLELITERAL;
3896 return TokenName.INTEGERLITERAL;
3903 while (getNextCharAsDigit()) {
3906 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3907 // return TokenName.LongLiteral;
3908 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be
3910 while (getNextCharAsDigit()) {
3915 // if floating is true both exponant and suffix may be optional
3916 if (getNextChar('e', 'E') >= 0) {
3918 // consume next character
3919 unicodeAsBackSlash = false;
3920 currentCharacter = source[currentPosition++];
3921 // if (((currentCharacter = source[currentPosition++]) == '\\')
3922 // && (source[currentPosition] == 'u')) {
3923 // getNextUnicodeChar();
3925 // if (withoutUnicodePtr != 0) {
3926 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3929 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3932 unicodeAsBackSlash = false;
3933 currentCharacter = source[currentPosition++];
3934 // if (((currentCharacter = source[currentPosition++]) == '\\')
3935 // && (source[currentPosition] == 'u')) {
3936 // getNextUnicodeChar();
3938 // if (withoutUnicodePtr != 0) {
3939 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3943 if (!Character.isDigit(currentCharacter))
3944 throw new InvalidInputException(INVALID_FLOAT);
3945 while (getNextCharAsDigit()) {
3949 if (getNextChar('d', 'D') >= 0)
3950 return TokenName.DOUBLELITERAL;
3951 // if (getNextChar('f', 'F') >= 0)
3952 // return TokenName.FloatingPointLiteral;
3953 // the long flag has been tested before
3954 return floating ? TokenName.DOUBLELITERAL : TokenName.INTEGERLITERAL;
3958 * Search the line number corresponding to a specific position
3961 public final int getLineNumber(int position) {
3962 if (lineEnds == null)
3964 int length = linePtr + 1;
3967 int g = 0, d = length - 1;
3971 if (position < lineEnds[m]) {
3973 } else if (position > lineEnds[m]) {
3979 if (position < lineEnds[m]) {
3985 public void setPHPMode(boolean mode) {
3989 public final void setSource(char[] source) {
3990 setSource(null, source);
3993 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3994 // the source-buffer is set to sourceString
3995 this.compilationUnit = compilationUnit;
3996 if (source == null) {
3997 this.source = new char[0];
3999 this.source = source;
4002 initialPosition = currentPosition = 0;
4003 containsAssertKeyword = false;
4004 withoutUnicodeBuffer = new char[this.source.length];
4005 fFillerToken = TokenName.EOF;
4006 // encapsedStringStack = new Stack();
4009 public String toString() {
4010 if (startPosition == source.length)
4011 return "EOF\n\n" + new String(source); //$NON-NLS-1$
4012 if (currentPosition > source.length)
4013 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
4014 char front[] = new char[startPosition];
4015 System.arraycopy(source, 0, front, 0, startPosition);
4016 int middleLength = (currentPosition - 1) - startPosition + 1;
4018 if (middleLength > -1) {
4019 middle = new char[middleLength];
4020 System.arraycopy(source, startPosition, middle, 0, middleLength);
4022 middle = new char[0];
4024 char end[] = new char[source.length - (currentPosition - 1)];
4025 System.arraycopy(source, (currentPosition - 1) + 1, end, 0,
4026 source.length - (currentPosition - 1) - 1);
4027 return new String(front)
4028 + "\n===============================\nStarts here -->" //$NON-NLS-1$
4029 + new String(middle)
4030 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
4034 public final String toStringAction(TokenName act) {
4037 return "ScannerError"; // + new String(getCurrentTokenSource()) +
4041 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4042 case ECHO_INVISIBLE:
4046 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4048 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4050 return "abstract"; //$NON-NLS-1$
4052 return "AND"; //$NON-NLS-1$
4054 return "array"; //$NON-NLS-1$
4056 return "as"; //$NON-NLS-1$
4058 return "break"; //$NON-NLS-1$
4060 return "case"; //$NON-NLS-1$
4062 return "class"; //$NON-NLS-1$
4064 return "catch"; //$NON-NLS-1$
4072 return "continue"; //$NON-NLS-1$
4074 return "default"; //$NON-NLS-1$
4076 // return "define"; //$NON-NLS-1$
4078 return "do"; //$NON-NLS-1$
4080 return "echo"; //$NON-NLS-1$
4082 return "else"; //$NON-NLS-1$
4084 return "elseif"; //$NON-NLS-1$
4086 return "endfor"; //$NON-NLS-1$
4088 return "endforeach"; //$NON-NLS-1$
4090 return "endif"; //$NON-NLS-1$
4092 return "endswitch"; //$NON-NLS-1$
4094 return "endwhile"; //$NON-NLS-1$
4098 return "extends"; //$NON-NLS-1$
4100 // return "false"; //$NON-NLS-1$
4102 return "final"; //$NON-NLS-1$
4104 return "for"; //$NON-NLS-1$
4106 return "foreach"; //$NON-NLS-1$
4108 return "function"; //$NON-NLS-1$
4110 return "global"; //$NON-NLS-1$
4112 return "if"; //$NON-NLS-1$
4114 return "implements"; //$NON-NLS-1$
4116 return "include"; //$NON-NLS-1$
4118 return "include_once"; //$NON-NLS-1$
4120 return "instanceof"; //$NON-NLS-1$
4122 return "interface"; //$NON-NLS-1$
4124 return "isset"; //$NON-NLS-1$
4126 return "list"; //$NON-NLS-1$
4128 return "new"; //$NON-NLS-1$
4130 // return "null"; //$NON-NLS-1$
4132 return "OR"; //$NON-NLS-1$
4134 return "print"; //$NON-NLS-1$
4136 return "private"; //$NON-NLS-1$
4138 return "protected"; //$NON-NLS-1$
4140 return "public"; //$NON-NLS-1$
4142 return "require"; //$NON-NLS-1$
4144 return "require_once"; //$NON-NLS-1$
4146 return "return"; //$NON-NLS-1$
4148 // return "self"; //$NON-NLS-1$
4150 return "static"; //$NON-NLS-1$
4152 return "switch"; //$NON-NLS-1$
4154 // return "true"; //$NON-NLS-1$
4156 return "unset"; //$NON-NLS-1$
4158 return "var"; //$NON-NLS-1$
4160 return "while"; //$NON-NLS-1$
4162 return "XOR"; //$NON-NLS-1$
4164 // return "$this"; //$NON-NLS-1$
4165 case INTEGERLITERAL:
4166 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4168 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4169 case STRINGDOUBLEQUOTE:
4170 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4171 case STRINGSINGLEQUOTE:
4172 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4173 case STRINGINTERPOLATED:
4174 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4175 case ENCAPSEDSTRING0:
4176 return "`"; //$NON-NLS-1$
4177 // case EncapsedString1:
4178 // return "\'"; //$NON-NLS-1$
4179 // case EncapsedString2:
4180 // return "\""; //$NON-NLS-1$
4182 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4184 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4186 return "++"; //$NON-NLS-1$
4188 return "--"; //$NON-NLS-1$
4190 return "=="; //$NON-NLS-1$
4191 case EQUAL_EQUAL_EQUAL:
4192 return "==="; //$NON-NLS-1$
4194 return "=>"; //$NON-NLS-1$
4196 return "<="; //$NON-NLS-1$
4198 return ">="; //$NON-NLS-1$
4200 return "!="; //$NON-NLS-1$
4201 case NOT_EQUAL_EQUAL:
4202 return "!=="; //$NON-NLS-1$
4204 return "<<"; //$NON-NLS-1$
4206 return ">>"; //$NON-NLS-1$
4208 return "+="; //$NON-NLS-1$
4210 return "-="; //$NON-NLS-1$
4211 case MULTIPLY_EQUAL:
4212 return "*="; //$NON-NLS-1$
4214 return "/="; //$NON-NLS-1$
4216 return "&="; //$NON-NLS-1$
4218 return "|="; //$NON-NLS-1$
4220 return "^="; //$NON-NLS-1$
4221 case REMAINDER_EQUAL:
4222 return "%="; //$NON-NLS-1$
4224 return ".="; //$NON-NLS-1$
4225 case LEFT_SHIFT_EQUAL:
4226 return "<<="; //$NON-NLS-1$
4227 case RIGHT_SHIFT_EQUAL:
4228 return ">>="; //$NON-NLS-1$
4230 return "||"; //$NON-NLS-1$
4232 return "&&"; //$NON-NLS-1$
4234 return "+"; //$NON-NLS-1$
4236 return "-"; //$NON-NLS-1$
4240 return "!"; //$NON-NLS-1$
4242 return "%"; //$NON-NLS-1$
4244 return "^"; //$NON-NLS-1$
4246 return "&"; //$NON-NLS-1$
4248 return "*"; //$NON-NLS-1$
4250 return "|"; //$NON-NLS-1$
4252 return "~"; //$NON-NLS-1$
4254 return "~="; //$NON-NLS-1$
4256 return "/"; //$NON-NLS-1$
4258 return ">"; //$NON-NLS-1$
4260 return "<"; //$NON-NLS-1$
4262 return "("; //$NON-NLS-1$
4264 return ")"; //$NON-NLS-1$
4266 return "{"; //$NON-NLS-1$
4268 return "}"; //$NON-NLS-1$
4270 return "["; //$NON-NLS-1$
4272 return "]"; //$NON-NLS-1$
4274 return ";"; //$NON-NLS-1$
4276 return "?"; //$NON-NLS-1$
4278 return ":"; //$NON-NLS-1$
4280 return ","; //$NON-NLS-1$
4282 return "."; //$NON-NLS-1$
4284 return "="; //$NON-NLS-1$
4294 return "EOF"; //$NON-NLS-1$
4296 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4298 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4300 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4301 case COMMENT_PHPDOC:
4302 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4304 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4307 return "__FILE__"; //$NON-NLS-1$
4309 return "__LINE__"; //$NON-NLS-1$
4311 return "__CLASS__"; //$NON-NLS-1$
4313 return "__METHOD__"; //$NON-NLS-1$
4315 return "__FUNCTION__"; //$NON-NLS-1
4317 return "( bool )"; //$NON-NLS-1$
4319 return "( int )"; //$NON-NLS-1$
4321 return "( double )"; //$NON-NLS-1$
4323 return "( object )"; //$NON-NLS-1$
4325 return "( string )"; //$NON-NLS-1$
4327 return "( namespace )"; //$NON-NLS-1$
4329 return "token not handled (" + (act.toString ()) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4337 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4338 this(tokenizeComments, tokenizeWhiteSpace, false);
4341 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4342 boolean checkNonExternalizedStringLiterals) {
4343 this(tokenizeComments, tokenizeWhiteSpace,
4344 checkNonExternalizedStringLiterals, false);
4347 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4348 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
4349 this(tokenizeComments, tokenizeWhiteSpace,
4350 checkNonExternalizedStringLiterals, assertMode, false, null,
4354 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
4355 boolean checkNonExternalizedStringLiterals, boolean assertMode,
4356 boolean tokenizeStrings, char[][] taskTags,
4357 char[][] taskPriorities, boolean isTaskCaseSensitive) {
4358 this.eofPosition = Integer.MAX_VALUE;
4359 this.tokenizeComments = tokenizeComments;
4360 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4361 this.tokenizeStrings = tokenizeStrings;
4362 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4363 // this.assertMode = assertMode;
4364 // this.encapsedStringStack = null;
4365 this.taskTags = taskTags;
4366 this.taskPriorities = taskPriorities;
4369 private void checkNonExternalizeString() throws InvalidInputException {
4370 if (currentLine == null)
4372 parseTags(currentLine);
4375 private void parseTags(NLSLine line) throws InvalidInputException {
4376 String s = new String(getCurrentTokenSource());
4377 int pos = s.indexOf(TAG_PREFIX);
4378 int lineLength = line.size();
4380 int start = pos + TAG_PREFIX_LENGTH;
4381 int end = s.indexOf(TAG_POSTFIX, start);
4382 String index = s.substring(start, end);
4385 i = Integer.parseInt(index) - 1;
4386 // Tags are one based not zero based.
4387 } catch (NumberFormatException e) {
4388 i = -1; // we don't want to consider this as a valid NLS tag
4390 if (line.exists(i)) {
4393 pos = s.indexOf(TAG_PREFIX, start);
4395 this.nonNLSStrings = new StringLiteral[lineLength];
4396 int nonNLSCounter = 0;
4397 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4398 StringLiteral literal = (StringLiteral) iterator.next();
4399 if (literal != null) {
4400 this.nonNLSStrings[nonNLSCounter++] = literal;
4403 if (nonNLSCounter == 0) {
4404 this.nonNLSStrings = null;
4408 this.wasNonExternalizedStringLiteral = true;
4409 if (nonNLSCounter != lineLength) {
4410 System.arraycopy(this.nonNLSStrings, 0,
4411 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
4417 public final void scanEscapeCharacter() throws InvalidInputException {
4418 // the string with "\\u" is a legal string of two chars \ and u
4419 // thus we use a direct access to the source (for regular cases).
4420 if (unicodeAsBackSlash) {
4421 // consume next character
4422 unicodeAsBackSlash = false;
4423 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4424 // (source[currentPosition] == 'u')) {
4425 // getNextUnicodeChar();
4427 if (withoutUnicodePtr != 0) {
4428 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4432 currentCharacter = source[currentPosition++];
4433 switch (currentCharacter) {
4435 currentCharacter = '\b';
4438 currentCharacter = '\t';
4441 currentCharacter = '\n';
4444 currentCharacter = '\f';
4447 currentCharacter = '\r';
4450 currentCharacter = '\"';
4453 currentCharacter = '\'';
4456 currentCharacter = '\\';
4459 // -----------octal escape--------------
4461 // OctalDigit OctalDigit
4462 // ZeroToThree OctalDigit OctalDigit
4463 int number = Character.getNumericValue(currentCharacter);
4464 if (number >= 0 && number <= 7) {
4465 boolean zeroToThreeNot = number > 3;
4467 .isDigit(currentCharacter = source[currentPosition++])) {
4468 int digit = Character.getNumericValue(currentCharacter);
4469 if (digit >= 0 && digit <= 7) {
4470 number = (number * 8) + digit;
4472 .isDigit(currentCharacter = source[currentPosition++])) {
4473 if (zeroToThreeNot) { // has read \NotZeroToThree
4475 // Digit --> ignore last character
4479 .getNumericValue(currentCharacter);
4480 if (digit >= 0 && digit <= 7) { // has read
4482 // OctalDigit OctalDigit
4483 number = (number * 8) + digit;
4484 } else { // has read \ZeroToThree OctalDigit
4486 // --> ignore last character
4490 } else { // has read \OctalDigit NonDigit--> ignore
4495 } else { // has read \OctalDigit NonOctalDigit--> ignore
4500 } else { // has read \OctalDigit --> ignore last character
4504 throw new InvalidInputException(INVALID_ESCAPE);
4505 currentCharacter = (char) number;
4507 throw new InvalidInputException(INVALID_ESCAPE);
4511 // chech presence of task: tags
4512 // TODO (frederic) see if we need to take unicode characters into account...
4513 public void checkTaskTag(int commentStart, int commentEnd) {
4514 char[] src = this.source;
4516 // only look for newer task: tags
4517 if (this.foundTaskCount > 0
4518 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4521 int foundTaskIndex = this.foundTaskCount;
4522 char previous = src[commentStart + 1]; // should be '*' or '/'
4523 nextChar: for (int i = commentStart + 2; i < commentEnd
4524 && i < this.eofPosition; i++) {
4526 char[] priority = null;
4527 // check for tag occurrence only if not ambiguous with javadoc tag
4528 if (previous != '@') {
4529 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4530 tag = this.taskTags[itag];
4531 int tagLength = tag.length;
4535 // ensure tag is not leaded with letter if tag starts with a
4537 if (Scanner.isPHPIdentifierStart(tag[0])) {
4538 if (Scanner.isPHPIdentifierPart(previous)) {
4543 for (int t = 0; t < tagLength; t++) {
4546 if (x >= this.eofPosition || x >= commentEnd)
4548 if ((sc = src[i + t]) != (tc = tag[t])) { // case
4551 if (this.isTaskCaseSensitive
4552 || (Character.toLowerCase(sc) != Character
4553 .toLowerCase(tc))) { // case
4560 // ensure tag is not followed with letter if tag finishes
4563 if (i + tagLength < commentEnd
4564 && Scanner.isPHPIdentifierPart(src[i + tagLength
4566 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4569 if (this.foundTaskTags == null) {
4570 this.foundTaskTags = new char[5][];
4571 this.foundTaskMessages = new char[5][];
4572 this.foundTaskPriorities = new char[5][];
4573 this.foundTaskPositions = new int[5][];
4574 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4579 this.foundTaskTags = new char[this.foundTaskCount * 2][],
4580 0, this.foundTaskCount);
4583 this.foundTaskMessages,
4585 this.foundTaskMessages = new char[this.foundTaskCount * 2][],
4586 0, this.foundTaskCount);
4589 this.foundTaskPriorities,
4591 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4592 0, this.foundTaskCount);
4595 this.foundTaskPositions,
4597 this.foundTaskPositions = new int[this.foundTaskCount * 2][],
4598 0, this.foundTaskCount);
4601 priority = this.taskPriorities != null
4602 && itag < this.taskPriorities.length ? this.taskPriorities[itag]
4605 this.foundTaskTags[this.foundTaskCount] = tag;
4606 this.foundTaskPriorities[this.foundTaskCount] = priority;
4607 this.foundTaskPositions[this.foundTaskCount] = new int[] {
4608 i, i + tagLength - 1 };
4609 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4610 this.foundTaskCount++;
4611 i += tagLength - 1; // will be incremented when looping
4617 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4618 // retrieve message start and end positions
4619 int msgStart = this.foundTaskPositions[i][0]
4620 + this.foundTaskTags[i].length;
4621 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1
4623 // at most beginning of next task
4624 if (max_value < msgStart) {
4625 max_value = msgStart; // would only occur if tag is before
4630 for (int j = msgStart; j < max_value; j++) {
4631 if ((c = src[j]) == '\n' || c == '\r') {
4637 for (int j = max_value; j > msgStart; j--) {
4638 if ((c = src[j]) == '*') {
4646 if (msgStart == end)
4649 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4651 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4653 // update the end position of the task
4654 this.foundTaskPositions[i][1] = end;
4655 // get the message source
4656 final int messageLength = end - msgStart + 1;
4657 char[] message = new char[messageLength];
4658 System.arraycopy(src, msgStart, message, 0, messageLength);
4659 this.foundTaskMessages[i] = message;
4663 // chech presence of task: tags
4664 // public void checkTaskTag(int commentStart, int commentEnd) {
4665 // // only look for newer task: tags
4666 // if (this.foundTaskCount > 0 &&
4667 // this.foundTaskPositions[this.foundTaskCount
4668 // - 1][0] >= commentStart) {
4671 // int foundTaskIndex = this.foundTaskCount;
4672 // nextChar: for (int i = commentStart; i < commentEnd && i <
4673 // this.eofPosition; i++) {
4674 // char[] tag = null;
4675 // char[] priority = null;
4676 // // check for tag occurrence
4677 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4678 // tag = this.taskTags[itag];
4679 // priority = this.taskPriorities != null && itag <
4680 // this.taskPriorities.length
4681 // ? this.taskPriorities[itag] : null;
4682 // int tagLength = tag.length;
4683 // for (int t = 0; t < tagLength; t++) {
4684 // if (this.source[i + t] != tag[t])
4685 // continue nextTag;
4687 // if (this.foundTaskTags == null) {
4688 // this.foundTaskTags = new char[5][];
4689 // this.foundTaskMessages = new char[5][];
4690 // this.foundTaskPriorities = new char[5][];
4691 // this.foundTaskPositions = new int[5][];
4692 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4693 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4694 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4695 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4696 // char[this.foundTaskCount * 2][], 0,
4697 // this.foundTaskCount);
4698 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4699 // new char[this.foundTaskCount * 2][], 0,
4700 // this.foundTaskCount);
4701 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions =
4703 // int[this.foundTaskCount * 2][], 0,
4704 // this.foundTaskCount);
4706 // this.foundTaskTags[this.foundTaskCount] = tag;
4707 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4708 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i +
4711 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4712 // this.foundTaskCount++;
4713 // i += tagLength - 1; // will be incremented when looping
4716 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4717 // // retrieve message start and end positions
4718 // int msgStart = this.foundTaskPositions[i][0] +
4719 // this.foundTaskTags[i].length;
4720 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4721 // 1][0] - 1 : commentEnd - 1;
4722 // // at most beginning of next task
4723 // if (max_value < msgStart)
4724 // max_value = msgStart; // would only occur if tag is before EOF.
4727 // for (int j = msgStart; j < max_value; j++) {
4728 // if ((c = this.source[j]) == '\n' || c == '\r') {
4734 // for (int j = max_value; j > msgStart; j--) {
4735 // if ((c = this.source[j]) == '*') {
4743 // if (msgStart == end)
4744 // continue; // empty
4745 // // trim the message
4746 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4748 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4750 // // update the end position of the task
4751 // this.foundTaskPositions[i][1] = end;
4752 // // get the message source
4753 // final int messageLength = end - msgStart + 1;
4754 // char[] message = new char[messageLength];
4755 // System.arraycopy(source, msgStart, message, 0, messageLength);
4756 // this.foundTaskMessages[i] = message;