1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this
24 * value is not memorized by the scanner) - getCurrentTokenSource() which
25 * provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 // private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
34 // flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
39 public boolean ignorePHPOneLiner = false;
41 public boolean phpMode = false;
43 public boolean phpExpressionTag = false;
45 // public Stack encapsedStringStack = null;
47 public char currentCharacter;
49 public int startPosition;
51 public int currentPosition;
53 public int initialPosition, eofPosition;
55 // after this position eof are generated instead of real token from the
57 public boolean tokenizeComments;
59 public boolean tokenizeWhiteSpace;
61 public boolean tokenizeStrings;
63 // source should be viewed as a window (aka a part)
64 // of a entire very large stream
68 public char[] withoutUnicodeBuffer;
70 public int withoutUnicodePtr;
72 // when == 0 ==> no unicode in the current token
73 public boolean unicodeAsBackSlash = false;
75 public boolean scanningFloatLiteral = false;
77 // support for /** comments
78 public int[] commentStops = new int[10];
80 public int[] commentStarts = new int[10];
82 public int commentPtr = -1; // no comment test with commentPtr value -1
84 protected int lastCommentLinePosition = -1;
86 // diet parsing support - jump over some method body when requested
87 public boolean diet = false;
89 // support for the poor-line-debuggers ....
90 // remember the position of the cr/lf
91 public int[] lineEnds = new int[250];
93 public int linePtr = -1;
95 public boolean wasAcr = false;
97 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
99 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
101 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
103 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
105 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
107 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
109 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
111 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
113 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
115 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
117 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
119 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
121 // ----------------optimized identifier managment------------------
122 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
123 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
124 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
125 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
126 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
127 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
128 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
129 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
130 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
132 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133 '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
134 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
142 public final static int MAX_OBVIOUS = 256;
144 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
146 public final static int C_DOLLAR = 8;
148 public final static int C_LETTER = 4;
150 public final static int C_DIGIT = 3;
152 public final static int C_SEPARATOR = 2;
154 public final static int C_SPACE = 1;
156 for (int i = '0'; i <= '9'; i++)
157 ObviousIdentCharNatures[i] = C_DIGIT;
159 for (int i = 'a'; i <= 'z'; i++)
160 ObviousIdentCharNatures[i] = C_LETTER;
161 for (int i = 'A'; i <= 'Z'; i++)
162 ObviousIdentCharNatures[i] = C_LETTER;
163 ObviousIdentCharNatures['_'] = C_LETTER;
164 for (int i = 127; i <= 255; i++)
165 ObviousIdentCharNatures[i] = C_LETTER;
167 ObviousIdentCharNatures['$'] = C_DOLLAR;
169 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
170 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
171 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
172 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
173 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
175 ObviousIdentCharNatures['.'] = C_SEPARATOR;
176 ObviousIdentCharNatures[':'] = C_SEPARATOR;
177 ObviousIdentCharNatures[';'] = C_SEPARATOR;
178 ObviousIdentCharNatures[','] = C_SEPARATOR;
179 ObviousIdentCharNatures['['] = C_SEPARATOR;
180 ObviousIdentCharNatures[']'] = C_SEPARATOR;
181 ObviousIdentCharNatures['('] = C_SEPARATOR;
182 ObviousIdentCharNatures[')'] = C_SEPARATOR;
183 ObviousIdentCharNatures['{'] = C_SEPARATOR;
184 ObviousIdentCharNatures['}'] = C_SEPARATOR;
185 ObviousIdentCharNatures['+'] = C_SEPARATOR;
186 ObviousIdentCharNatures['-'] = C_SEPARATOR;
187 ObviousIdentCharNatures['*'] = C_SEPARATOR;
188 ObviousIdentCharNatures['/'] = C_SEPARATOR;
189 ObviousIdentCharNatures['='] = C_SEPARATOR;
190 ObviousIdentCharNatures['&'] = C_SEPARATOR;
191 ObviousIdentCharNatures['|'] = C_SEPARATOR;
192 ObviousIdentCharNatures['?'] = C_SEPARATOR;
193 ObviousIdentCharNatures['<'] = C_SEPARATOR;
194 ObviousIdentCharNatures['>'] = C_SEPARATOR;
195 ObviousIdentCharNatures['!'] = C_SEPARATOR;
196 ObviousIdentCharNatures['%'] = C_SEPARATOR;
197 ObviousIdentCharNatures['^'] = C_SEPARATOR;
198 ObviousIdentCharNatures['~'] = C_SEPARATOR;
199 ObviousIdentCharNatures['"'] = C_SEPARATOR;
200 ObviousIdentCharNatures['\''] = C_SEPARATOR;
202 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
204 static final int TableSize = 30, InternalTableSize = 6;
206 // 30*6 = 180 entries
207 public static final int OptimizedLength = 6;
210 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
212 // support for detecting non-externalized string literals
213 int currentLineNr = -1;
215 int previousLineNr = -1;
217 NLSLine currentLine = null;
219 List lines = new ArrayList();
221 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
223 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
225 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
227 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
229 public StringLiteral[] nonNLSStrings = null;
231 public boolean checkNonExternalizedStringLiterals = true;
233 public boolean wasNonExternalizedStringLiteral = false;
236 for (int i = 0; i < 6; i++) {
237 for (int j = 0; j < TableSize; j++) {
238 for (int k = 0; k < InternalTableSize; k++) {
239 charArray_length[i][j][k] = initCharArray;
245 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
247 public static final int RoundBracket = 0;
249 public static final int SquareBracket = 1;
251 public static final int CurlyBracket = 2;
253 public static final int BracketKinds = 3;
256 public char[][] foundTaskTags = null;
258 public char[][] foundTaskMessages;
260 public char[][] foundTaskPriorities = null;
262 public int[][] foundTaskPositions;
264 public int foundTaskCount = 0;
266 public char[][] taskTags = null;
268 public char[][] taskPriorities = null;
270 public boolean isTaskCaseSensitive = true;
272 public static final boolean DEBUG = false;
274 public static final boolean TRACE = false;
276 public ICompilationUnit compilationUnit = null;
279 * Determines if the specified character is permissible as the first character
280 * in a PHP identifier or variable
282 * The '$' character for PHP variables is regarded as a correct first
286 public static boolean isPHPIdentOrVarStart(char ch) {
287 if (ch < MAX_OBVIOUS) {
288 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
291 //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
295 * Determines if the specified character is permissible as the first character
296 * in a PHP identifier.
298 * The '$' character for PHP variables isn't regarded as the first character !
300 public static boolean isPHPIdentifierStart(char ch) {
301 if (ch < MAX_OBVIOUS) {
302 return ObviousIdentCharNatures[ch]==C_LETTER;
305 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
309 * Determines if the specified character may be part of a PHP identifier as
310 * other than the first character
312 public static boolean isPHPIdentifierPart(char ch) {
313 if (ch < MAX_OBVIOUS) {
314 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
317 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
320 public static boolean isSQLIdentifierPart(char ch) {
321 if (ch < MAX_OBVIOUS) {
322 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
327 public final boolean atEnd() {
328 // This code is not relevant if source is
329 // Only a part of the real stream input
330 return source.length == currentPosition;
333 public char[] getCurrentIdentifierSource() {
334 // return the token REAL source (aka unicodes are precomputed)
336 // if (withoutUnicodePtr != 0)
337 // //0 is used as a fast test flag so the real first char is in position 1
339 // withoutUnicodeBuffer,
341 // result = new char[withoutUnicodePtr],
343 // withoutUnicodePtr);
345 int length = currentPosition - startPosition;
346 switch (length) { // see OptimizedLength
348 return optimizedCurrentTokenSource1();
350 return optimizedCurrentTokenSource2();
352 return optimizedCurrentTokenSource3();
354 return optimizedCurrentTokenSource4();
356 return optimizedCurrentTokenSource5();
358 return optimizedCurrentTokenSource6();
361 System.arraycopy(source, startPosition, result = new char[length], 0, length);
366 public int getCurrentTokenEndPosition() {
367 return this.currentPosition - 1;
370 public final char[] getCurrentTokenSource() {
371 // Return the token REAL source (aka unicodes are precomputed)
373 // if (withoutUnicodePtr != 0)
374 // // 0 is used as a fast test flag so the real first char is in position 1
376 // withoutUnicodeBuffer,
378 // result = new char[withoutUnicodePtr],
380 // withoutUnicodePtr);
383 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
388 public final char[] getCurrentTokenSource(int startPos) {
389 // Return the token REAL source (aka unicodes are precomputed)
391 // if (withoutUnicodePtr != 0)
392 // // 0 is used as a fast test flag so the real first char is in position 1
394 // withoutUnicodeBuffer,
396 // result = new char[withoutUnicodePtr],
398 // withoutUnicodePtr);
401 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
406 public final char[] getCurrentTokenSourceString() {
407 // return the token REAL source (aka unicodes are precomputed).
408 // REMOVE the two " that are at the beginning and the end.
410 if (withoutUnicodePtr != 0)
411 // 0 is used as a fast test flag so the real first char is in position 1
412 System.arraycopy(withoutUnicodeBuffer, 2,
413 // 2 is 1 (real start) + 1 (to jump over the ")
414 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
417 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
422 public final char[] getRawTokenSourceEnd() {
423 int length = this.eofPosition - this.currentPosition - 1;
424 char[] sourceEnd = new char[length];
425 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
429 public int getCurrentTokenStartPosition() {
430 return this.startPosition;
433 public final String getCurrentStringLiteral() {
434 char[] result = getCurrentStringLiteralSource();
435 return new String(result);
438 public final char[] getCurrentStringLiteralSource() {
439 // Return the token REAL source (aka unicodes are precomputed)
440 if (startPosition + 1 >= currentPosition) {
445 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
450 public final char[] getCurrentStringLiteralSource(int startPos) {
451 // Return the token REAL source (aka unicodes are precomputed)
454 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
460 * Search the source position corresponding to the end of a given line number
462 * Line numbers are 1-based, and relative to the scanner initialPosition.
463 * Character positions are 0-based.
465 * In case the given line number is inconsistent, answers -1.
467 public final int getLineEnd(int lineNumber) {
468 if (lineEnds == null)
470 if (lineNumber >= lineEnds.length)
474 if (lineNumber == lineEnds.length - 1)
476 return lineEnds[lineNumber - 1];
477 // next line start one character behind the lineEnd of the previous line
481 * Search the source position corresponding to the beginning of a given line
484 * Line numbers are 1-based, and relative to the scanner initialPosition.
485 * Character positions are 0-based.
487 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
489 * In case the given line number is inconsistent, answers -1.
491 public final int getLineStart(int lineNumber) {
492 if (lineEnds == null)
494 if (lineNumber >= lineEnds.length)
499 return initialPosition;
500 return lineEnds[lineNumber - 2] + 1;
501 // next line start one character behind the lineEnd of the previous line
504 public final boolean getNextChar(char testedChar) {
506 // handle the case of unicode.
507 // when a unicode appears then we must use a buffer that holds char
509 // At the end of this method currentCharacter holds the new visited char
510 // and currentPosition points right next after it
511 // Both previous lines are true if the currentCharacter is == to the
513 // On false, no side effect has occured.
514 // ALL getNextChar.... ARE OPTIMIZED COPIES
515 int temp = currentPosition;
517 currentCharacter = source[currentPosition++];
518 // if (((currentCharacter = source[currentPosition++]) == '\\')
519 // && (source[currentPosition] == 'u')) {
520 // //-------------unicode traitement ------------
521 // int c1, c2, c3, c4;
522 // int unicodeSize = 6;
523 // currentPosition++;
524 // while (source[currentPosition] == 'u') {
525 // currentPosition++;
529 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
531 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
533 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
535 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
537 // currentPosition = temp;
541 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
542 // if (currentCharacter != testedChar) {
543 // currentPosition = temp;
546 // unicodeAsBackSlash = currentCharacter == '\\';
548 // //need the unicode buffer
549 // if (withoutUnicodePtr == 0) {
550 // //buffer all the entries that have been left aside....
551 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
555 // withoutUnicodeBuffer,
557 // withoutUnicodePtr);
559 // //fill the buffer with the char
560 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
563 // } //-------------end unicode traitement--------------
565 if (currentCharacter != testedChar) {
566 currentPosition = temp;
569 unicodeAsBackSlash = false;
570 // if (withoutUnicodePtr != 0)
571 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
574 } catch (IndexOutOfBoundsException e) {
575 unicodeAsBackSlash = false;
576 currentPosition = temp;
581 public final int getNextChar(char testedChar1, char testedChar2) {
582 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
583 // test can be done with (x==0) for the first and (x>0) for the second
584 // handle the case of unicode.
585 // when a unicode appears then we must use a buffer that holds char
587 // At the end of this method currentCharacter holds the new visited char
588 // and currentPosition points right next after it
589 // Both previous lines are true if the currentCharacter is == to the
591 // On false, no side effect has occured.
592 // ALL getNextChar.... ARE OPTIMIZED COPIES
593 int temp = currentPosition;
596 currentCharacter = source[currentPosition++];
597 // if (((currentCharacter = source[currentPosition++]) == '\\')
598 // && (source[currentPosition] == 'u')) {
599 // //-------------unicode traitement ------------
600 // int c1, c2, c3, c4;
601 // int unicodeSize = 6;
602 // currentPosition++;
603 // while (source[currentPosition] == 'u') {
604 // currentPosition++;
608 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
610 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
612 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
614 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
616 // currentPosition = temp;
620 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
621 // if (currentCharacter == testedChar1)
623 // else if (currentCharacter == testedChar2)
626 // currentPosition = temp;
630 // //need the unicode buffer
631 // if (withoutUnicodePtr == 0) {
632 // //buffer all the entries that have been left aside....
633 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
637 // withoutUnicodeBuffer,
639 // withoutUnicodePtr);
641 // //fill the buffer with the char
642 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
644 // } //-------------end unicode traitement--------------
646 if (currentCharacter == testedChar1)
648 else if (currentCharacter == testedChar2)
651 currentPosition = temp;
654 // if (withoutUnicodePtr != 0)
655 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
658 } catch (IndexOutOfBoundsException e) {
659 currentPosition = temp;
664 public final boolean getNextCharAsDigit() {
666 // handle the case of unicode.
667 // when a unicode appears then we must use a buffer that holds char
669 // At the end of this method currentCharacter holds the new visited char
670 // and currentPosition points right next after it
671 // Both previous lines are true if the currentCharacter is a digit
672 // On false, no side effect has occured.
673 // ALL getNextChar.... ARE OPTIMIZED COPIES
674 int temp = currentPosition;
676 currentCharacter = source[currentPosition++];
677 // if (((currentCharacter = source[currentPosition++]) == '\\')
678 // && (source[currentPosition] == 'u')) {
679 // //-------------unicode traitement ------------
680 // int c1, c2, c3, c4;
681 // int unicodeSize = 6;
682 // currentPosition++;
683 // while (source[currentPosition] == 'u') {
684 // currentPosition++;
688 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
690 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
692 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
694 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
696 // currentPosition = temp;
700 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
701 // if (!Character.isDigit(currentCharacter)) {
702 // currentPosition = temp;
706 // //need the unicode buffer
707 // if (withoutUnicodePtr == 0) {
708 // //buffer all the entries that have been left aside....
709 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
713 // withoutUnicodeBuffer,
715 // withoutUnicodePtr);
717 // //fill the buffer with the char
718 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
720 // } //-------------end unicode traitement--------------
722 if (!Character.isDigit(currentCharacter)) {
723 currentPosition = temp;
726 // if (withoutUnicodePtr != 0)
727 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
730 } catch (IndexOutOfBoundsException e) {
731 currentPosition = temp;
736 public final boolean getNextCharAsDigit(int radix) {
738 // handle the case of unicode.
739 // when a unicode appears then we must use a buffer that holds char
741 // At the end of this method currentCharacter holds the new visited char
742 // and currentPosition points right next after it
743 // Both previous lines are true if the currentCharacter is a digit base on
745 // On false, no side effect has occured.
746 // ALL getNextChar.... ARE OPTIMIZED COPIES
747 int temp = currentPosition;
749 currentCharacter = source[currentPosition++];
750 // if (((currentCharacter = source[currentPosition++]) == '\\')
751 // && (source[currentPosition] == 'u')) {
752 // //-------------unicode traitement ------------
753 // int c1, c2, c3, c4;
754 // int unicodeSize = 6;
755 // currentPosition++;
756 // while (source[currentPosition] == 'u') {
757 // currentPosition++;
761 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
763 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
765 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
767 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
769 // currentPosition = temp;
773 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
774 // if (Character.digit(currentCharacter, radix) == -1) {
775 // currentPosition = temp;
779 // //need the unicode buffer
780 // if (withoutUnicodePtr == 0) {
781 // //buffer all the entries that have been left aside....
782 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
786 // withoutUnicodeBuffer,
788 // withoutUnicodePtr);
790 // //fill the buffer with the char
791 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
793 // } //-------------end unicode traitement--------------
795 if (Character.digit(currentCharacter, radix) == -1) {
796 currentPosition = temp;
799 // if (withoutUnicodePtr != 0)
800 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
803 } catch (IndexOutOfBoundsException e) {
804 currentPosition = temp;
809 public boolean getNextCharAsJavaIdentifierPart() {
811 // handle the case of unicode.
812 // when a unicode appears then we must use a buffer that holds char
814 // At the end of this method currentCharacter holds the new visited char
815 // and currentPosition points right next after it
816 // Both previous lines are true if the currentCharacter is a
817 // JavaIdentifierPart
818 // On false, no side effect has occured.
819 // ALL getNextChar.... ARE OPTIMIZED COPIES
820 int temp = currentPosition;
822 currentCharacter = source[currentPosition++];
823 // if (((currentCharacter = source[currentPosition++]) == '\\')
824 // && (source[currentPosition] == 'u')) {
825 // //-------------unicode traitement ------------
826 // int c1, c2, c3, c4;
827 // int unicodeSize = 6;
828 // currentPosition++;
829 // while (source[currentPosition] == 'u') {
830 // currentPosition++;
834 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
836 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
838 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
840 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
842 // currentPosition = temp;
846 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
847 // if (!isPHPIdentifierPart(currentCharacter)) {
848 // currentPosition = temp;
852 // //need the unicode buffer
853 // if (withoutUnicodePtr == 0) {
854 // //buffer all the entries that have been left aside....
855 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
859 // withoutUnicodeBuffer,
861 // withoutUnicodePtr);
863 // //fill the buffer with the char
864 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
866 // } //-------------end unicode traitement--------------
868 if (!isPHPIdentifierPart(currentCharacter)) {
869 currentPosition = temp;
872 // if (withoutUnicodePtr != 0)
873 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
876 } catch (IndexOutOfBoundsException e) {
877 currentPosition = temp;
882 public int getCastOrParen() {
883 int tempPosition = currentPosition;
884 char tempCharacter = currentCharacter;
885 int tempToken = TokenNameLPAREN;
886 boolean found = false;
887 StringBuffer buf = new StringBuffer();
890 currentCharacter = source[currentPosition++];
891 } while (currentCharacter == ' ' || currentCharacter == '\t');
892 while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
893 // while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
894 buf.append(currentCharacter);
895 currentCharacter = source[currentPosition++];
897 if (buf.length() >= 3 && buf.length() <= 7) {
898 char[] data = buf.toString().toCharArray();
900 switch (data.length) {
903 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
905 tempToken = TokenNameintCAST;
910 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
912 tempToken = TokenNameboolCAST;
915 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
917 tempToken = TokenNamedoubleCAST;
923 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
924 && (data[++index] == 'y')) {
926 tempToken = TokenNamearrayCAST;
929 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
930 && (data[++index] == 't')) {
932 tempToken = TokenNameunsetCAST;
935 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
936 && (data[++index] == 't')) {
938 tempToken = TokenNamedoubleCAST;
944 // object string double
945 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
946 && (data[++index] == 'c') && (data[++index] == 't')) {
948 tempToken = TokenNameobjectCAST;
951 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
952 && (data[++index] == 'n') && (data[++index] == 'g')) {
954 tempToken = TokenNamestringCAST;
957 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
958 && (data[++index] == 'l') && (data[++index] == 'e')) {
960 tempToken = TokenNamedoubleCAST;
967 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
968 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
970 tempToken = TokenNameboolCAST;
973 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
974 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
976 tempToken = TokenNameintCAST;
982 while (currentCharacter == ' ' || currentCharacter == '\t') {
983 currentCharacter = source[currentPosition++];
985 if (currentCharacter == ')') {
990 } catch (IndexOutOfBoundsException e) {
992 currentCharacter = tempCharacter;
993 currentPosition = tempPosition;
994 return TokenNameLPAREN;
997 public void consumeStringInterpolated() throws InvalidInputException {
999 // consume next character
1000 unicodeAsBackSlash = false;
1001 currentCharacter = source[currentPosition++];
1002 // if (((currentCharacter = source[currentPosition++]) == '\\')
1003 // && (source[currentPosition] == 'u')) {
1004 // getNextUnicodeChar();
1006 // if (withoutUnicodePtr != 0) {
1007 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1008 // currentCharacter;
1011 while (currentCharacter != '`') {
1012 /** ** in PHP \r and \n are valid in string literals *** */
1013 // if ((currentCharacter == '\n')
1014 // || (currentCharacter == '\r')) {
1015 // // relocate if finding another quote fairly close: thus unicode
1016 // '/u000D' will be fully consumed
1017 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1018 // if (currentPosition + lookAhead == source.length)
1020 // if (source[currentPosition + lookAhead] == '\n')
1022 // if (source[currentPosition + lookAhead] == '\"') {
1023 // currentPosition += lookAhead + 1;
1027 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1029 if (currentCharacter == '\\') {
1030 int escapeSize = currentPosition;
1031 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1032 // scanEscapeCharacter make a side effect on this value and we need
1033 // the previous value few lines down this one
1034 scanDoubleQuotedEscapeCharacter();
1035 escapeSize = currentPosition - escapeSize;
1036 if (withoutUnicodePtr == 0) {
1037 // buffer all the entries that have been left aside....
1038 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1039 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1040 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1041 } else { // overwrite the / in the buffer
1042 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1043 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1044 // where only one is correct
1045 withoutUnicodePtr--;
1048 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1049 if (recordLineSeparator) {
1050 pushLineSeparator();
1053 // consume next character
1054 unicodeAsBackSlash = false;
1055 currentCharacter = source[currentPosition++];
1056 // if (((currentCharacter = source[currentPosition++]) == '\\')
1057 // && (source[currentPosition] == 'u')) {
1058 // getNextUnicodeChar();
1060 if (withoutUnicodePtr != 0) {
1061 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1065 } catch (IndexOutOfBoundsException e) {
1066 // reset end position for error reporting
1067 currentPosition -= 2;
1068 throw new InvalidInputException(UNTERMINATED_STRING);
1069 } catch (InvalidInputException e) {
1070 if (e.getMessage().equals(INVALID_ESCAPE)) {
1071 // relocate if finding another quote fairly close: thus unicode
1072 // '/u000D' will be fully consumed
1073 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1074 if (currentPosition + lookAhead == source.length)
1076 if (source[currentPosition + lookAhead] == '\n')
1078 if (source[currentPosition + lookAhead] == '`') {
1079 currentPosition += lookAhead + 1;
1086 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1087 // //$NON-NLS-?$ where ? is an
1089 if (currentLine == null) {
1090 currentLine = new NLSLine();
1091 lines.add(currentLine);
1093 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1097 public void consumeStringConstant() throws InvalidInputException {
1099 // consume next character
1100 unicodeAsBackSlash = false;
1101 currentCharacter = source[currentPosition++];
1102 // if (((currentCharacter = source[currentPosition++]) == '\\')
1103 // && (source[currentPosition] == 'u')) {
1104 // getNextUnicodeChar();
1106 // if (withoutUnicodePtr != 0) {
1107 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1108 // currentCharacter;
1111 while (currentCharacter != '\'') {
1112 /** ** in PHP \r and \n are valid in string literals *** */
1113 // if ((currentCharacter == '\n')
1114 // || (currentCharacter == '\r')) {
1115 // // relocate if finding another quote fairly close: thus unicode
1116 // '/u000D' will be fully consumed
1117 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1118 // if (currentPosition + lookAhead == source.length)
1120 // if (source[currentPosition + lookAhead] == '\n')
1122 // if (source[currentPosition + lookAhead] == '\"') {
1123 // currentPosition += lookAhead + 1;
1127 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1129 if (currentCharacter == '\\') {
1130 int escapeSize = currentPosition;
1131 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1132 // scanEscapeCharacter make a side effect on this value and we need
1133 // the previous value few lines down this one
1134 scanSingleQuotedEscapeCharacter();
1135 escapeSize = currentPosition - escapeSize;
1136 if (withoutUnicodePtr == 0) {
1137 // buffer all the entries that have been left aside....
1138 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1139 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1140 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1141 } else { // overwrite the / in the buffer
1142 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1143 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1144 // where only one is correct
1145 withoutUnicodePtr--;
1148 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1149 if (recordLineSeparator) {
1150 pushLineSeparator();
1153 // consume next character
1154 unicodeAsBackSlash = false;
1155 currentCharacter = source[currentPosition++];
1156 // if (((currentCharacter = source[currentPosition++]) == '\\')
1157 // && (source[currentPosition] == 'u')) {
1158 // getNextUnicodeChar();
1160 if (withoutUnicodePtr != 0) {
1161 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1165 } catch (IndexOutOfBoundsException e) {
1166 // reset end position for error reporting
1167 currentPosition -= 2;
1168 throw new InvalidInputException(UNTERMINATED_STRING);
1169 } catch (InvalidInputException e) {
1170 if (e.getMessage().equals(INVALID_ESCAPE)) {
1171 // relocate if finding another quote fairly close: thus unicode
1172 // '/u000D' will be fully consumed
1173 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1174 if (currentPosition + lookAhead == source.length)
1176 if (source[currentPosition + lookAhead] == '\n')
1178 if (source[currentPosition + lookAhead] == '\'') {
1179 currentPosition += lookAhead + 1;
1186 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1187 // //$NON-NLS-?$ where ? is an
1189 if (currentLine == null) {
1190 currentLine = new NLSLine();
1191 lines.add(currentLine);
1193 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1197 public void consumeStringLiteral() throws InvalidInputException {
1199 boolean openDollarBrace = false;
1200 // consume next character
1201 unicodeAsBackSlash = false;
1202 currentCharacter = source[currentPosition++];
1203 while (currentCharacter != '"' || openDollarBrace) {
1204 /** ** in PHP \r and \n are valid in string literals *** */
1205 if (currentCharacter == '\\') {
1206 int escapeSize = currentPosition;
1207 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1208 // scanEscapeCharacter make a side effect on this value and we need
1209 // the previous value few lines down this one
1210 scanDoubleQuotedEscapeCharacter();
1211 escapeSize = currentPosition - escapeSize;
1212 if (withoutUnicodePtr == 0) {
1213 // buffer all the entries that have been left aside....
1214 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1215 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1216 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1217 } else { // overwrite the / in the buffer
1218 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1219 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1220 // where only one is correct
1221 withoutUnicodePtr--;
1224 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1225 openDollarBrace = true;
1226 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1227 openDollarBrace = true;
1228 } else if (currentCharacter == '}') {
1229 openDollarBrace = false;
1230 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1231 if (recordLineSeparator) {
1232 pushLineSeparator();
1235 // consume next character
1236 unicodeAsBackSlash = false;
1237 currentCharacter = source[currentPosition++];
1238 if (withoutUnicodePtr != 0) {
1239 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1242 } catch (IndexOutOfBoundsException e) {
1243 // reset end position for error reporting
1244 currentPosition -= 2;
1245 throw new InvalidInputException(UNTERMINATED_STRING);
1246 } catch (InvalidInputException e) {
1247 if (e.getMessage().equals(INVALID_ESCAPE)) {
1248 // relocate if finding another quote fairly close: thus unicode
1249 // '/u000D' will be fully consumed
1250 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1251 if (currentPosition + lookAhead == source.length)
1253 if (source[currentPosition + lookAhead] == '\n')
1255 if (source[currentPosition + lookAhead] == '\"') {
1256 currentPosition += lookAhead + 1;
1263 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1264 // //$NON-NLS-?$ where ? is an
1266 if (currentLine == null) {
1267 currentLine = new NLSLine();
1268 lines.add(currentLine);
1270 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1274 public int getNextToken() throws InvalidInputException {
1275 phpExpressionTag = false;
1277 return getInlinedHTMLToken(currentPosition);
1280 this.wasAcr = false;
1282 jumpOverMethodBody();
1284 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1288 withoutUnicodePtr = 0;
1289 // start with a new token
1290 char encapsedChar = ' ';
1291 // if (!encapsedStringStack.isEmpty()) {
1292 // encapsedChar = ((Character)
1293 // encapsedStringStack.peek()).charValue();
1295 // if (encapsedChar != '$' && encapsedChar != ' ') {
1296 // currentCharacter = source[currentPosition++];
1297 // if (currentCharacter == encapsedChar) {
1298 // switch (currentCharacter) {
1300 // return TokenNameEncapsedString0;
1302 // return TokenNameEncapsedString1;
1304 // return TokenNameEncapsedString2;
1307 // while (currentCharacter != encapsedChar) {
1308 // /** ** in PHP \r and \n are valid in string literals *** */
1309 // switch (currentCharacter) {
1311 // int escapeSize = currentPosition;
1312 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1313 // //scanEscapeCharacter make a side effect on this value and
1314 // // we need the previous value few lines down this one
1315 // scanDoubleQuotedEscapeCharacter();
1316 // escapeSize = currentPosition - escapeSize;
1317 // if (withoutUnicodePtr == 0) {
1318 // //buffer all the entries that have been left aside....
1319 // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1321 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1322 // withoutUnicodePtr);
1323 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1324 // } else { //overwrite the / in the buffer
1325 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1326 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1327 // withoutUnicodePtr--;
1333 // if (recordLineSeparator) {
1334 // pushLineSeparator();
1338 // if (isPHPIdentifierStart(source[currentPosition]) ||
1339 // source[currentPosition] == '{') {
1340 // currentPosition--;
1341 // encapsedStringStack.push(new Character('$'));
1342 // return TokenNameSTRING;
1346 // if (source[currentPosition] == '$') { // CURLY_OPEN
1347 // currentPosition--;
1348 // encapsedStringStack.push(new Character('$'));
1349 // return TokenNameSTRING;
1352 // // consume next character
1353 // unicodeAsBackSlash = false;
1354 // currentCharacter = source[currentPosition++];
1355 // if (withoutUnicodePtr != 0) {
1356 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1360 // currentPosition--;
1361 // return TokenNameSTRING;
1363 // ---------Consume white space and handles startPosition---------
1364 int whiteStart = currentPosition;
1365 startPosition = currentPosition;
1366 currentCharacter = source[currentPosition++];
1367 // if (encapsedChar == '$') {
1368 // switch (currentCharacter) {
1370 // currentCharacter = source[currentPosition++];
1371 // return TokenNameSTRING;
1373 // if (encapsedChar == '$') {
1374 // if (getNextChar('$'))
1375 // return TokenNameLBRACE_DOLLAR;
1377 // return TokenNameLBRACE;
1379 // return TokenNameRBRACE;
1381 // return TokenNameLBRACKET;
1383 // return TokenNameRBRACKET;
1385 // if (tokenizeStrings) {
1386 // consumeStringConstant();
1387 // return TokenNameStringSingleQuote;
1389 // return TokenNameEncapsedString1;
1391 // return TokenNameEncapsedString2;
1393 // if (tokenizeStrings) {
1394 // consumeStringInterpolated();
1395 // return TokenNameStringInterpolated;
1397 // return TokenNameEncapsedString0;
1399 // if (getNextChar('>'))
1400 // return TokenNameMINUS_GREATER;
1401 // return TokenNameSTRING;
1403 // if (currentCharacter == '$') {
1404 // int oldPosition = currentPosition;
1406 // currentCharacter = source[currentPosition++];
1407 // if (currentCharacter == '{') {
1408 // return TokenNameDOLLAR_LBRACE;
1410 // if (isPHPIdentifierStart(currentCharacter)) {
1411 // return scanIdentifierOrKeyword(true);
1413 // currentPosition = oldPosition;
1414 // return TokenNameSTRING;
1416 // } catch (IndexOutOfBoundsException e) {
1417 // currentPosition = oldPosition;
1418 // return TokenNameSTRING;
1421 // if (isPHPIdentifierStart(currentCharacter))
1422 // return scanIdentifierOrKeyword(false);
1423 // if (Character.isDigit(currentCharacter))
1424 // return scanNumber(false);
1425 // return TokenNameERROR;
1428 // boolean isWhiteSpace;
1430 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1431 startPosition = currentPosition;
1432 currentCharacter = source[currentPosition++];
1433 // if (((currentCharacter = source[currentPosition++]) == '\\')
1434 // && (source[currentPosition] == 'u')) {
1435 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1437 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1438 checkNonExternalizeString();
1439 if (recordLineSeparator) {
1440 pushLineSeparator();
1445 // isWhiteSpace = (currentCharacter == ' ')
1446 // || Character.isWhitespace(currentCharacter);
1449 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1450 // reposition scanner in case we are interested by spaces as tokens
1452 startPosition = whiteStart;
1453 return TokenNameWHITESPACE;
1455 // little trick to get out in the middle of a source compuation
1456 if (currentPosition > eofPosition)
1457 return TokenNameEOF;
1458 // ---------Identify the next token-------------
1459 switch (currentCharacter) {
1461 return getCastOrParen();
1463 return TokenNameRPAREN;
1465 return TokenNameLBRACE;
1467 return TokenNameRBRACE;
1469 return TokenNameLBRACKET;
1471 return TokenNameRBRACKET;
1473 return TokenNameSEMICOLON;
1475 return TokenNameCOMMA;
1477 if (getNextChar('='))
1478 return TokenNameDOT_EQUAL;
1479 if (getNextCharAsDigit())
1480 return scanNumber(true);
1481 return TokenNameDOT;
1484 if ((test = getNextChar('+', '=')) == 0)
1485 return TokenNamePLUS_PLUS;
1487 return TokenNamePLUS_EQUAL;
1488 return TokenNamePLUS;
1492 if ((test = getNextChar('-', '=')) == 0)
1493 return TokenNameMINUS_MINUS;
1495 return TokenNameMINUS_EQUAL;
1496 if (getNextChar('>'))
1497 return TokenNameMINUS_GREATER;
1498 return TokenNameMINUS;
1501 if (getNextChar('='))
1502 return TokenNameTWIDDLE_EQUAL;
1503 return TokenNameTWIDDLE;
1505 if (getNextChar('=')) {
1506 if (getNextChar('=')) {
1507 return TokenNameNOT_EQUAL_EQUAL;
1509 return TokenNameNOT_EQUAL;
1511 return TokenNameNOT;
1513 if (getNextChar('='))
1514 return TokenNameMULTIPLY_EQUAL;
1515 return TokenNameMULTIPLY;
1517 if (getNextChar('='))
1518 return TokenNameREMAINDER_EQUAL;
1519 return TokenNameREMAINDER;
1521 int oldPosition = currentPosition;
1523 currentCharacter = source[currentPosition++];
1524 } catch (IndexOutOfBoundsException e) {
1525 currentPosition = oldPosition;
1526 return TokenNameLESS;
1528 switch (currentCharacter) {
1530 return TokenNameLESS_EQUAL;
1532 return TokenNameNOT_EQUAL;
1534 if (getNextChar('='))
1535 return TokenNameLEFT_SHIFT_EQUAL;
1536 if (getNextChar('<')) {
1537 currentCharacter = source[currentPosition++];
1538 while (Character.isWhitespace(currentCharacter)) {
1539 currentCharacter = source[currentPosition++];
1541 int heredocStart = currentPosition - 1;
1542 int heredocLength = 0;
1543 if (isPHPIdentifierStart(currentCharacter)) {
1544 currentCharacter = source[currentPosition++];
1546 return TokenNameERROR;
1548 while (isPHPIdentifierPart(currentCharacter)) {
1549 currentCharacter = source[currentPosition++];
1551 heredocLength = currentPosition - heredocStart - 1;
1552 // heredoc end-tag determination
1553 boolean endTag = true;
1556 ch = source[currentPosition++];
1557 if (ch == '\r' || ch == '\n') {
1558 if (recordLineSeparator) {
1559 pushLineSeparator();
1563 for (int i = 0; i < heredocLength; i++) {
1564 if (source[currentPosition + i] != source[heredocStart + i]) {
1570 currentPosition += heredocLength - 1;
1571 currentCharacter = source[currentPosition++];
1572 break; // do...while loop
1578 return TokenNameHEREDOC;
1580 return TokenNameLEFT_SHIFT;
1582 currentPosition = oldPosition;
1583 return TokenNameLESS;
1587 if ((test = getNextChar('=', '>')) == 0)
1588 return TokenNameGREATER_EQUAL;
1590 if ((test = getNextChar('=', '>')) == 0)
1591 return TokenNameRIGHT_SHIFT_EQUAL;
1592 return TokenNameRIGHT_SHIFT;
1594 return TokenNameGREATER;
1597 if (getNextChar('=')) {
1598 if (getNextChar('=')) {
1599 return TokenNameEQUAL_EQUAL_EQUAL;
1601 return TokenNameEQUAL_EQUAL;
1603 if (getNextChar('>'))
1604 return TokenNameEQUAL_GREATER;
1605 return TokenNameEQUAL;
1608 if ((test = getNextChar('&', '=')) == 0)
1609 return TokenNameAND_AND;
1611 return TokenNameAND_EQUAL;
1612 return TokenNameAND;
1616 if ((test = getNextChar('|', '=')) == 0)
1617 return TokenNameOR_OR;
1619 return TokenNameOR_EQUAL;
1623 if (getNextChar('='))
1624 return TokenNameXOR_EQUAL;
1625 return TokenNameXOR;
1627 if (getNextChar('>')) {
1629 if (currentPosition == source.length) {
1631 return TokenNameINLINE_HTML;
1633 return getInlinedHTMLToken(currentPosition - 2);
1635 return TokenNameQUESTION;
1637 if (getNextChar(':'))
1638 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1639 return TokenNameCOLON;
1643 consumeStringConstant();
1644 return TokenNameStringSingleQuote;
1646 // if (tokenizeStrings) {
1647 consumeStringLiteral();
1648 return TokenNameStringDoubleQuote;
1650 // return TokenNameEncapsedString2;
1652 // if (tokenizeStrings) {
1653 consumeStringInterpolated();
1654 return TokenNameStringInterpolated;
1656 // return TokenNameEncapsedString0;
1659 char startChar = currentCharacter;
1660 if (getNextChar('=') && startChar == '/') {
1661 return TokenNameDIVIDE_EQUAL;
1664 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1666 this.lastCommentLinePosition = this.currentPosition;
1667 int endPositionForLineComment = 0;
1668 try { // get the next char
1669 currentCharacter = source[currentPosition++];
1670 // if (((currentCharacter = source[currentPosition++])
1672 // && (source[currentPosition] == 'u')) {
1673 // //-------------unicode traitement ------------
1674 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1675 // currentPosition++;
1676 // while (source[currentPosition] == 'u') {
1677 // currentPosition++;
1680 // Character.getNumericValue(source[currentPosition++]))
1684 // Character.getNumericValue(source[currentPosition++]))
1688 // Character.getNumericValue(source[currentPosition++]))
1692 // Character.getNumericValue(source[currentPosition++]))
1696 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1698 // currentCharacter =
1699 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1702 // handle the \\u case manually into comment
1703 // if (currentCharacter == '\\') {
1704 // if (source[currentPosition] == '\\')
1705 // currentPosition++;
1706 // } //jump over the \\
1707 boolean isUnicode = false;
1708 while (currentCharacter != '\r' && currentCharacter != '\n') {
1709 this.lastCommentLinePosition = this.currentPosition;
1710 if (currentCharacter == '?') {
1711 if (getNextChar('>')) {
1712 // ?> breaks line comments
1713 startPosition = currentPosition - 2;
1715 return TokenNameINLINE_HTML;
1718 // get the next char
1720 currentCharacter = source[currentPosition++];
1721 // if (((currentCharacter = source[currentPosition++])
1723 // && (source[currentPosition] == 'u')) {
1724 // isUnicode = true;
1725 // //-------------unicode traitement ------------
1726 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1727 // currentPosition++;
1728 // while (source[currentPosition] == 'u') {
1729 // currentPosition++;
1732 // Character.getNumericValue(source[currentPosition++]))
1736 // Character.getNumericValue(
1737 // source[currentPosition++]))
1741 // Character.getNumericValue(
1742 // source[currentPosition++]))
1746 // Character.getNumericValue(
1747 // source[currentPosition++]))
1751 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1753 // currentCharacter =
1754 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1757 // handle the \\u case manually into comment
1758 // if (currentCharacter == '\\') {
1759 // if (source[currentPosition] == '\\')
1760 // currentPosition++;
1761 // } //jump over the \\
1764 endPositionForLineComment = currentPosition - 6;
1766 endPositionForLineComment = currentPosition - 1;
1768 // recordComment(false);
1769 recordComment(TokenNameCOMMENT_LINE);
1770 if (this.taskTags != null)
1771 checkTaskTag(this.startPosition, this.currentPosition);
1772 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1773 checkNonExternalizeString();
1774 if (recordLineSeparator) {
1776 pushUnicodeLineSeparator();
1778 pushLineSeparator();
1784 if (tokenizeComments) {
1786 currentPosition = endPositionForLineComment;
1787 // reset one character behind
1789 return TokenNameCOMMENT_LINE;
1791 } catch (IndexOutOfBoundsException e) { // an eof will them
1793 if (tokenizeComments) {
1795 // reset one character behind
1796 return TokenNameCOMMENT_LINE;
1802 // traditional and annotation comment
1803 boolean isJavadoc = false, star = false;
1804 // consume next character
1805 unicodeAsBackSlash = false;
1806 currentCharacter = source[currentPosition++];
1807 // if (((currentCharacter = source[currentPosition++]) ==
1809 // && (source[currentPosition] == 'u')) {
1810 // getNextUnicodeChar();
1812 // if (withoutUnicodePtr != 0) {
1813 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1814 // currentCharacter;
1817 if (currentCharacter == '*') {
1821 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1822 checkNonExternalizeString();
1823 if (recordLineSeparator) {
1824 pushLineSeparator();
1829 try { // get the next char
1830 currentCharacter = source[currentPosition++];
1831 // if (((currentCharacter = source[currentPosition++])
1833 // && (source[currentPosition] == 'u')) {
1834 // //-------------unicode traitement ------------
1835 // getNextUnicodeChar();
1837 // handle the \\u case manually into comment
1838 // if (currentCharacter == '\\') {
1839 // if (source[currentPosition] == '\\')
1840 // currentPosition++;
1841 // //jump over the \\
1843 // empty comment is not a javadoc /**/
1844 if (currentCharacter == '/') {
1847 // loop until end of comment */
1848 while ((currentCharacter != '/') || (!star)) {
1849 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1850 checkNonExternalizeString();
1851 if (recordLineSeparator) {
1852 pushLineSeparator();
1857 star = currentCharacter == '*';
1859 currentCharacter = source[currentPosition++];
1860 // if (((currentCharacter = source[currentPosition++])
1862 // && (source[currentPosition] == 'u')) {
1863 // //-------------unicode traitement ------------
1864 // getNextUnicodeChar();
1866 // handle the \\u case manually into comment
1867 // if (currentCharacter == '\\') {
1868 // if (source[currentPosition] == '\\')
1869 // currentPosition++;
1870 // } //jump over the \\
1872 // recordComment(isJavadoc);
1874 recordComment(TokenNameCOMMENT_PHPDOC);
1876 recordComment(TokenNameCOMMENT_BLOCK);
1879 if (tokenizeComments) {
1881 return TokenNameCOMMENT_PHPDOC;
1882 return TokenNameCOMMENT_BLOCK;
1885 if (this.taskTags != null) {
1886 checkTaskTag(this.startPosition, this.currentPosition);
1888 } catch (IndexOutOfBoundsException e) {
1889 // reset end position for error reporting
1890 currentPosition -= 2;
1891 throw new InvalidInputException(UNTERMINATED_COMMENT);
1895 return TokenNameDIVIDE;
1899 return TokenNameEOF;
1900 // the atEnd may not be <currentPosition == source.length> if
1901 // source is only some part of a real (external) stream
1902 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1904 if (currentCharacter == '$') {
1905 int oldPosition = currentPosition;
1907 currentCharacter = source[currentPosition++];
1908 if (isPHPIdentifierStart(currentCharacter)) {
1909 return scanIdentifierOrKeyword(true);
1911 currentPosition = oldPosition;
1912 return TokenNameDOLLAR;
1914 } catch (IndexOutOfBoundsException e) {
1915 currentPosition = oldPosition;
1916 return TokenNameDOLLAR;
1919 if (isPHPIdentifierStart(currentCharacter))
1920 return scanIdentifierOrKeyword(false);
1921 if (Character.isDigit(currentCharacter))
1922 return scanNumber(false);
1923 return TokenNameERROR;
1926 } // -----------------end switch while try--------------------
1927 catch (IndexOutOfBoundsException e) {
1930 return TokenNameEOF;
1935 * @throws InvalidInputException
1937 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1938 if (currentPosition > source.length) {
1939 currentPosition = source.length;
1940 return TokenNameEOF;
1942 startPosition = start;
1945 currentCharacter = source[currentPosition++];
1946 if (currentCharacter == '<') {
1947 if (getNextChar('?')) {
1948 currentCharacter = source[currentPosition++];
1949 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1950 if (currentCharacter != '=') { // <?=
1953 phpExpressionTag = true;
1956 if (ignorePHPOneLiner) { // for CodeFormatter
1957 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1959 return TokenNameINLINE_HTML;
1963 return TokenNameINLINE_HTML;
1966 // boolean phpStart = (currentCharacter == 'P') ||
1967 // (currentCharacter == 'p');
1969 int test = getNextChar('H', 'h');
1971 test = getNextChar('P', 'p');
1974 if (ignorePHPOneLiner) {
1975 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1977 return TokenNameINLINE_HTML;
1981 return TokenNameINLINE_HTML;
1989 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1990 if (recordLineSeparator) {
1991 pushLineSeparator();
1996 } // -----------------while--------------------
1998 return TokenNameINLINE_HTML;
1999 } // -----------------try--------------------
2000 catch (IndexOutOfBoundsException e) {
2001 startPosition = start;
2005 return TokenNameINLINE_HTML;
2011 private int lookAheadLinePHPTag() {
2012 // check if the PHP is only in this line (for CodeFormatter)
2013 int currentPositionInLine = currentPosition;
2014 char previousCharInLine = ' ';
2015 char currentCharInLine = ' ';
2016 boolean singleQuotedStringActive = false;
2017 boolean doubleQuotedStringActive = false;
2020 // look ahead in this line
2022 previousCharInLine = currentCharInLine;
2023 currentCharInLine = source[currentPositionInLine++];
2024 switch (currentCharInLine) {
2026 if (previousCharInLine == '?') {
2027 // update the scanner's current Position in the source
2028 currentPosition = currentPositionInLine;
2029 // use as "dummy" token
2030 return TokenNameEOF;
2034 if (doubleQuotedStringActive) {
2035 // ignore escaped characters in double quoted strings
2036 previousCharInLine = currentCharInLine;
2037 currentCharInLine = source[currentPositionInLine++];
2040 if (doubleQuotedStringActive) {
2041 doubleQuotedStringActive = false;
2043 if (!singleQuotedStringActive) {
2044 doubleQuotedStringActive = true;
2049 if (singleQuotedStringActive) {
2050 if (previousCharInLine != '\\') {
2051 singleQuotedStringActive = false;
2054 if (!doubleQuotedStringActive) {
2055 singleQuotedStringActive = true;
2061 return TokenNameINLINE_HTML;
2063 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2065 return TokenNameINLINE_HTML;
2069 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2071 return TokenNameINLINE_HTML;
2075 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2077 return TokenNameINLINE_HTML;
2082 } catch (IndexOutOfBoundsException e) {
2084 currentPosition = currentPositionInLine;
2085 return TokenNameINLINE_HTML;
2089 // public final void getNextUnicodeChar()
2090 // throws IndexOutOfBoundsException, InvalidInputException {
2092 // //handle the case of unicode.
2093 // //when a unicode appears then we must use a buffer that holds char
2095 // //At the end of this method currentCharacter holds the new visited char
2096 // //and currentPosition points right next after it
2098 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2100 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2101 // currentPosition++;
2102 // while (source[currentPosition] == 'u') {
2103 // currentPosition++;
2107 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2109 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2111 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2113 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2115 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2117 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2118 // //need the unicode buffer
2119 // if (withoutUnicodePtr == 0) {
2120 // //buffer all the entries that have been left aside....
2121 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2122 // System.arraycopy(
2125 // withoutUnicodeBuffer,
2127 // withoutUnicodePtr);
2129 // //fill the buffer with the char
2130 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2132 // unicodeAsBackSlash = currentCharacter == '\\';
2135 * Tokenize a method body, assuming that curly brackets are properly balanced.
2137 public final void jumpOverMethodBody() {
2138 this.wasAcr = false;
2141 while (true) { // loop for jumping over comments
2142 // ---------Consume white space and handles startPosition---------
2143 boolean isWhiteSpace;
2145 startPosition = currentPosition;
2146 currentCharacter = source[currentPosition++];
2147 // if (((currentCharacter = source[currentPosition++]) == '\\')
2148 // && (source[currentPosition] == 'u')) {
2149 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2151 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2152 pushLineSeparator();
2153 isWhiteSpace = Character.isWhitespace(currentCharacter);
2155 } while (isWhiteSpace);
2156 // -------consume token until } is found---------
2157 switch (currentCharacter) {
2168 test = getNextChar('\\');
2171 scanDoubleQuotedEscapeCharacter();
2172 } catch (InvalidInputException ex) {
2176 // try { // consume next character
2177 unicodeAsBackSlash = false;
2178 currentCharacter = source[currentPosition++];
2179 // if (((currentCharacter = source[currentPosition++]) == '\\')
2180 // && (source[currentPosition] == 'u')) {
2181 // getNextUnicodeChar();
2183 if (withoutUnicodePtr != 0) {
2184 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2187 // } catch (InvalidInputException ex) {
2195 // try { // consume next character
2196 unicodeAsBackSlash = false;
2197 currentCharacter = source[currentPosition++];
2198 // if (((currentCharacter = source[currentPosition++]) == '\\')
2199 // && (source[currentPosition] == 'u')) {
2200 // getNextUnicodeChar();
2202 if (withoutUnicodePtr != 0) {
2203 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2206 // } catch (InvalidInputException ex) {
2208 while (currentCharacter != '"') {
2209 if (currentCharacter == '\r') {
2210 if (source[currentPosition] == '\n')
2213 // the string cannot go further that the line
2215 if (currentCharacter == '\n') {
2217 // the string cannot go further that the line
2219 if (currentCharacter == '\\') {
2221 scanDoubleQuotedEscapeCharacter();
2222 } catch (InvalidInputException ex) {
2226 // try { // consume next character
2227 unicodeAsBackSlash = false;
2228 currentCharacter = source[currentPosition++];
2229 // if (((currentCharacter = source[currentPosition++]) == '\\')
2230 // && (source[currentPosition] == 'u')) {
2231 // getNextUnicodeChar();
2233 if (withoutUnicodePtr != 0) {
2234 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2237 // } catch (InvalidInputException ex) {
2240 } catch (IndexOutOfBoundsException e) {
2246 if ((test = getNextChar('/', '*')) == 0) {
2249 // get the next char
2250 currentCharacter = source[currentPosition++];
2251 // if (((currentCharacter = source[currentPosition++]) ==
2253 // && (source[currentPosition] == 'u')) {
2254 // //-------------unicode traitement ------------
2255 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2256 // currentPosition++;
2257 // while (source[currentPosition] == 'u') {
2258 // currentPosition++;
2261 // Character.getNumericValue(source[currentPosition++]))
2265 // Character.getNumericValue(source[currentPosition++]))
2269 // Character.getNumericValue(source[currentPosition++]))
2273 // Character.getNumericValue(source[currentPosition++]))
2276 // //error don't care of the value
2277 // currentCharacter = 'A';
2278 // } //something different from \n and \r
2280 // currentCharacter =
2281 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2284 while (currentCharacter != '\r' && currentCharacter != '\n') {
2285 // get the next char
2286 currentCharacter = source[currentPosition++];
2287 // if (((currentCharacter = source[currentPosition++])
2289 // && (source[currentPosition] == 'u')) {
2290 // //-------------unicode traitement ------------
2291 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2292 // currentPosition++;
2293 // while (source[currentPosition] == 'u') {
2294 // currentPosition++;
2297 // Character.getNumericValue(source[currentPosition++]))
2301 // Character.getNumericValue(source[currentPosition++]))
2305 // Character.getNumericValue(source[currentPosition++]))
2309 // Character.getNumericValue(source[currentPosition++]))
2312 // //error don't care of the value
2313 // currentCharacter = 'A';
2314 // } //something different from \n and \r
2316 // currentCharacter =
2317 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2321 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2322 pushLineSeparator();
2323 } catch (IndexOutOfBoundsException e) {
2324 } // an eof will them be generated
2328 // traditional and annotation comment
2329 boolean star = false;
2330 // try { // consume next character
2331 unicodeAsBackSlash = false;
2332 currentCharacter = source[currentPosition++];
2333 // if (((currentCharacter = source[currentPosition++]) == '\\')
2334 // && (source[currentPosition] == 'u')) {
2335 // getNextUnicodeChar();
2337 if (withoutUnicodePtr != 0) {
2338 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2341 // } catch (InvalidInputException ex) {
2343 if (currentCharacter == '*') {
2346 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2347 pushLineSeparator();
2348 try { // get the next char
2349 currentCharacter = source[currentPosition++];
2350 // if (((currentCharacter = source[currentPosition++]) ==
2352 // && (source[currentPosition] == 'u')) {
2353 // //-------------unicode traitement ------------
2354 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2355 // currentPosition++;
2356 // while (source[currentPosition] == 'u') {
2357 // currentPosition++;
2360 // Character.getNumericValue(source[currentPosition++]))
2364 // Character.getNumericValue(source[currentPosition++]))
2368 // Character.getNumericValue(source[currentPosition++]))
2372 // Character.getNumericValue(source[currentPosition++]))
2375 // //error don't care of the value
2376 // currentCharacter = 'A';
2377 // } //something different from * and /
2379 // currentCharacter =
2380 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2383 // loop until end of comment */
2384 while ((currentCharacter != '/') || (!star)) {
2385 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2386 pushLineSeparator();
2387 star = currentCharacter == '*';
2389 currentCharacter = source[currentPosition++];
2390 // if (((currentCharacter = source[currentPosition++])
2392 // && (source[currentPosition] == 'u')) {
2393 // //-------------unicode traitement ------------
2394 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2395 // currentPosition++;
2396 // while (source[currentPosition] == 'u') {
2397 // currentPosition++;
2400 // Character.getNumericValue(source[currentPosition++]))
2404 // Character.getNumericValue(source[currentPosition++]))
2408 // Character.getNumericValue(source[currentPosition++]))
2412 // Character.getNumericValue(source[currentPosition++]))
2415 // //error don't care of the value
2416 // currentCharacter = 'A';
2417 // } //something different from * and /
2419 // currentCharacter =
2420 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2424 } catch (IndexOutOfBoundsException e) {
2432 if (isPHPIdentOrVarStart(currentCharacter) ) {
2434 scanIdentifierOrKeyword((currentCharacter == '$'));
2435 } catch (InvalidInputException ex) {
2440 if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
2441 // if (Character.isDigit(currentCharacter)) {
2444 } catch (InvalidInputException ex) {
2451 // -----------------end switch while try--------------------
2452 } catch (IndexOutOfBoundsException e) {
2453 } catch (InvalidInputException e) {
2458 // public final boolean jumpOverUnicodeWhiteSpace()
2459 // throws InvalidInputException {
2461 // //handle the case of unicode. Jump over the next whiteSpace
2462 // //making startPosition pointing on the next available char
2463 // //On false, the currentCharacter is filled up with a potential
2467 // this.wasAcr = false;
2468 // int c1, c2, c3, c4;
2469 // int unicodeSize = 6;
2470 // currentPosition++;
2471 // while (source[currentPosition] == 'u') {
2472 // currentPosition++;
2476 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2478 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2480 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2482 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2484 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2487 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2488 // if (recordLineSeparator
2489 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2490 // pushLineSeparator();
2491 // if (Character.isWhitespace(currentCharacter))
2494 // //buffer the new char which is not a white space
2495 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2496 // //withoutUnicodePtr == 1 is true here
2498 // } catch (IndexOutOfBoundsException e) {
2499 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2502 public final int[] getLineEnds() {
2503 // return a bounded copy of this.lineEnds
2505 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2509 public char[] getSource() {
2513 public static boolean isIdentifierOrKeyword(int token) {
2514 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2517 final char[] optimizedCurrentTokenSource1() {
2518 // return always the same char[] build only once
2519 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2520 char charOne = source[startPosition];
2575 return new char[] { charOne };
2579 final char[] optimizedCurrentTokenSource2() {
2581 c0 = source[startPosition];
2582 c1 = source[startPosition + 1];
2584 // return always the same char[] build only once
2585 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2588 return charArray_va;
2590 return charArray_vb;
2592 return charArray_vc;
2594 return charArray_vd;
2596 return charArray_ve;
2598 return charArray_vf;
2600 return charArray_vg;
2602 return charArray_vh;
2604 return charArray_vi;
2606 return charArray_vj;
2608 return charArray_vk;
2610 return charArray_vl;
2612 return charArray_vm;
2614 return charArray_vn;
2616 return charArray_vo;
2618 return charArray_vp;
2620 return charArray_vq;
2622 return charArray_vr;
2624 return charArray_vs;
2626 return charArray_vt;
2628 return charArray_vu;
2630 return charArray_vv;
2632 return charArray_vw;
2634 return charArray_vx;
2636 return charArray_vy;
2638 return charArray_vz;
2641 // try to return the same char[] build only once
2642 int hash = ((c0 << 6) + c1) % TableSize;
2643 char[][] table = charArray_length[0][hash];
2645 while (++i < InternalTableSize) {
2646 char[] charArray = table[i];
2647 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2650 // ---------other side---------
2652 int max = newEntry2;
2653 while (++i <= max) {
2654 char[] charArray = table[i];
2655 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2658 // --------add the entry-------
2659 if (++max >= InternalTableSize)
2662 table[max] = (r = new char[] { c0, c1 });
2667 final char[] optimizedCurrentTokenSource3() {
2668 // try to return the same char[] build only once
2670 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2672 char[][] table = charArray_length[1][hash];
2674 while (++i < InternalTableSize) {
2675 char[] charArray = table[i];
2676 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2679 // ---------other side---------
2681 int max = newEntry3;
2682 while (++i <= max) {
2683 char[] charArray = table[i];
2684 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2687 // --------add the entry-------
2688 if (++max >= InternalTableSize)
2691 table[max] = (r = new char[] { c0, c1, c2 });
2696 final char[] optimizedCurrentTokenSource4() {
2697 // try to return the same char[] build only once
2698 char c0, c1, c2, c3;
2699 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2700 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2702 char[][] table = charArray_length[2][(int) hash];
2704 while (++i < InternalTableSize) {
2705 char[] charArray = table[i];
2706 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2709 // ---------other side---------
2711 int max = newEntry4;
2712 while (++i <= max) {
2713 char[] charArray = table[i];
2714 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2717 // --------add the entry-------
2718 if (++max >= InternalTableSize)
2721 table[max] = (r = new char[] { c0, c1, c2, c3 });
2726 final char[] optimizedCurrentTokenSource5() {
2727 // try to return the same char[] build only once
2728 char c0, c1, c2, c3, c4;
2729 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2730 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2732 char[][] table = charArray_length[3][(int) hash];
2734 while (++i < InternalTableSize) {
2735 char[] charArray = table[i];
2736 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2739 // ---------other side---------
2741 int max = newEntry5;
2742 while (++i <= max) {
2743 char[] charArray = table[i];
2744 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2747 // --------add the entry-------
2748 if (++max >= InternalTableSize)
2751 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2756 final char[] optimizedCurrentTokenSource6() {
2757 // try to return the same char[] build only once
2758 char c0, c1, c2, c3, c4, c5;
2759 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2760 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2761 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2763 char[][] table = charArray_length[4][(int) hash];
2765 while (++i < InternalTableSize) {
2766 char[] charArray = table[i];
2767 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2768 && (c5 == charArray[5]))
2771 // ---------other side---------
2773 int max = newEntry6;
2774 while (++i <= max) {
2775 char[] charArray = table[i];
2776 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2777 && (c5 == charArray[5]))
2780 // --------add the entry-------
2781 if (++max >= InternalTableSize)
2784 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2789 public final void pushLineSeparator() throws InvalidInputException {
2790 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2791 final int INCREMENT = 250;
2792 if (this.checkNonExternalizedStringLiterals) {
2793 // reinitialize the current line for non externalize strings purpose
2796 // currentCharacter is at position currentPosition-1
2798 if (currentCharacter == '\r') {
2799 int separatorPos = currentPosition - 1;
2800 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2802 // System.out.println("CR-" + separatorPos);
2804 lineEnds[++linePtr] = separatorPos;
2805 } catch (IndexOutOfBoundsException e) {
2806 // linePtr value is correct
2807 int oldLength = lineEnds.length;
2808 int[] old = lineEnds;
2809 lineEnds = new int[oldLength + INCREMENT];
2810 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2811 lineEnds[linePtr] = separatorPos;
2813 // look-ahead for merged cr+lf
2815 if (source[currentPosition] == '\n') {
2816 // System.out.println("look-ahead LF-" + currentPosition);
2817 lineEnds[linePtr] = currentPosition;
2823 } catch (IndexOutOfBoundsException e) {
2828 if (currentCharacter == '\n') {
2829 // must merge eventual cr followed by lf
2830 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2831 // System.out.println("merge LF-" + (currentPosition - 1));
2832 lineEnds[linePtr] = currentPosition - 1;
2834 int separatorPos = currentPosition - 1;
2835 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2837 // System.out.println("LF-" + separatorPos);
2839 lineEnds[++linePtr] = separatorPos;
2840 } catch (IndexOutOfBoundsException e) {
2841 // linePtr value is correct
2842 int oldLength = lineEnds.length;
2843 int[] old = lineEnds;
2844 lineEnds = new int[oldLength + INCREMENT];
2845 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2846 lineEnds[linePtr] = separatorPos;
2854 public final void pushUnicodeLineSeparator() {
2855 // isUnicode means that the \r or \n has been read as a unicode character
2856 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2857 final int INCREMENT = 250;
2858 // currentCharacter is at position currentPosition-1
2859 if (this.checkNonExternalizedStringLiterals) {
2860 // reinitialize the current line for non externalize strings purpose
2864 if (currentCharacter == '\r') {
2865 int separatorPos = currentPosition - 6;
2866 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2868 // System.out.println("CR-" + separatorPos);
2870 lineEnds[++linePtr] = separatorPos;
2871 } catch (IndexOutOfBoundsException e) {
2872 // linePtr value is correct
2873 int oldLength = lineEnds.length;
2874 int[] old = lineEnds;
2875 lineEnds = new int[oldLength + INCREMENT];
2876 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2877 lineEnds[linePtr] = separatorPos;
2879 // look-ahead for merged cr+lf
2880 if (source[currentPosition] == '\n') {
2881 // System.out.println("look-ahead LF-" + currentPosition);
2882 lineEnds[linePtr] = currentPosition;
2890 if (currentCharacter == '\n') {
2891 // must merge eventual cr followed by lf
2892 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2893 // System.out.println("merge LF-" + (currentPosition - 1));
2894 lineEnds[linePtr] = currentPosition - 6;
2896 int separatorPos = currentPosition - 6;
2897 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2899 // System.out.println("LF-" + separatorPos);
2901 lineEnds[++linePtr] = separatorPos;
2902 } catch (IndexOutOfBoundsException e) {
2903 // linePtr value is correct
2904 int oldLength = lineEnds.length;
2905 int[] old = lineEnds;
2906 lineEnds = new int[oldLength + INCREMENT];
2907 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2908 lineEnds[linePtr] = separatorPos;
2916 public void recordComment(int token) {
2918 int stopPosition = this.currentPosition;
2920 case TokenNameCOMMENT_LINE:
2921 stopPosition = -this.lastCommentLinePosition;
2923 case TokenNameCOMMENT_BLOCK:
2924 stopPosition = -this.currentPosition;
2928 // a new comment is recorded
2929 int length = this.commentStops.length;
2930 if (++this.commentPtr >= length) {
2931 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2932 // grows the positions buffers too
2933 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2935 this.commentStops[this.commentPtr] = stopPosition;
2936 this.commentStarts[this.commentPtr] = this.startPosition;
2939 // public final void recordComment(boolean isJavadoc) {
2940 // // a new annotation comment is recorded
2942 // commentStops[++commentPtr] = isJavadoc
2943 // ? currentPosition
2944 // : -currentPosition;
2945 // } catch (IndexOutOfBoundsException e) {
2946 // int oldStackLength = commentStops.length;
2947 // int[] oldStack = commentStops;
2948 // commentStops = new int[oldStackLength + 30];
2949 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2950 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2951 // //grows the positions buffers too
2952 // int[] old = commentStarts;
2953 // commentStarts = new int[oldStackLength + 30];
2954 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2956 // //the buffer is of a correct size here
2957 // commentStarts[commentPtr] = startPosition;
2959 public void resetTo(int begin, int end) {
2960 // reset the scanner to a given position where it may rescan again
2962 initialPosition = startPosition = currentPosition = begin;
2963 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2964 commentPtr = -1; // reset comment stack
2967 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2968 // the string with "\\u" is a legal string of two chars \ and u
2969 // thus we use a direct access to the source (for regular cases).
2970 // if (unicodeAsBackSlash) {
2971 // // consume next character
2972 // unicodeAsBackSlash = false;
2973 // if (((currentCharacter = source[currentPosition++]) == '\\')
2974 // && (source[currentPosition] == 'u')) {
2975 // getNextUnicodeChar();
2977 // if (withoutUnicodePtr != 0) {
2978 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2982 currentCharacter = source[currentPosition++];
2983 switch (currentCharacter) {
2985 currentCharacter = '\'';
2988 currentCharacter = '\\';
2991 currentCharacter = '\\';
2996 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2997 currentCharacter = source[currentPosition++];
2998 switch (currentCharacter) {
3000 // currentCharacter = '\b';
3003 currentCharacter = '\t';
3006 currentCharacter = '\n';
3009 // currentCharacter = '\f';
3012 currentCharacter = '\r';
3015 currentCharacter = '\"';
3018 currentCharacter = '\'';
3021 currentCharacter = '\\';
3024 currentCharacter = '$';
3027 // -----------octal escape--------------
3029 // OctalDigit OctalDigit
3030 // ZeroToThree OctalDigit OctalDigit
3031 int number = Character.getNumericValue(currentCharacter);
3032 if (number >= 0 && number <= 7) {
3033 boolean zeroToThreeNot = number > 3;
3034 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3035 int digit = Character.getNumericValue(currentCharacter);
3036 if (digit >= 0 && digit <= 7) {
3037 number = (number * 8) + digit;
3038 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3039 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3040 // Digit --> ignore last character
3043 digit = Character.getNumericValue(currentCharacter);
3044 if (digit >= 0 && digit <= 7) {
3045 // has read \ZeroToThree OctalDigit OctalDigit
3046 number = (number * 8) + digit;
3047 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3048 // --> ignore last character
3052 } else { // has read \OctalDigit NonDigit--> ignore last
3056 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3060 } else { // has read \OctalDigit --> ignore last character
3064 throw new InvalidInputException(INVALID_ESCAPE);
3065 currentCharacter = (char) number;
3068 // throw new InvalidInputException(INVALID_ESCAPE);
3072 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3073 // return scanIdentifierOrKeyword( false );
3075 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3077 // first dispatch on the first char.
3078 // then the length. If there are several
3079 // keywors with the same length AND the same first char, then do another
3080 // disptach on the second char :-)...cool....but fast !
3081 useAssertAsAnIndentifier = false;
3082 while (getNextCharAsJavaIdentifierPart()) {
3086 // if (new String(getCurrentTokenSource()).equals("$this")) {
3087 // return TokenNamethis;
3089 return TokenNameVariable;
3094 // if (withoutUnicodePtr == 0)
3095 // quick test on length == 1 but not on length > 12 while most identifier
3096 // have a length which is <= 12...but there are lots of identifier with
3097 // only one char....
3099 if ((length = currentPosition - startPosition) == 1)
3100 return TokenNameIdentifier;
3102 data = new char[length];
3103 index = startPosition;
3104 for (int i = 0; i < length; i++) {
3105 data[i] = Character.toLowerCase(source[index + i]);
3109 // if ((length = withoutUnicodePtr) == 1)
3110 // return TokenNameIdentifier;
3111 // // data = withoutUnicodeBuffer;
3112 // data = new char[withoutUnicodeBuffer.length];
3113 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3114 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3118 firstLetter = data[index];
3119 switch (firstLetter) {
3124 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3125 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3126 return TokenNameFILE;
3127 index = 0; // __LINE__
3128 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3129 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3130 return TokenNameLINE;
3134 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3135 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3136 return TokenNameCLASS_C;
3140 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3141 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3142 && (data[++index] == '_'))
3143 return TokenNameMETHOD_C;
3147 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3148 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3149 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3150 return TokenNameFUNC_C;
3153 return TokenNameIdentifier;
3155 // as and array abstract
3159 if ((data[++index] == 's')) {
3162 return TokenNameIdentifier;
3166 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3167 return TokenNameand;
3169 return TokenNameIdentifier;
3173 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3174 return TokenNamearray;
3176 return TokenNameIdentifier;
3178 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3179 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3180 return TokenNameabstract;
3182 return TokenNameIdentifier;
3184 return TokenNameIdentifier;
3190 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3191 return TokenNamebreak;
3193 return TokenNameIdentifier;
3195 return TokenNameIdentifier;
3198 // case catch class clone const continue
3201 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3202 return TokenNamecase;
3204 return TokenNameIdentifier;
3206 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3207 return TokenNamecatch;
3209 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3210 return TokenNameclass;
3212 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3213 return TokenNameclone;
3215 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3216 return TokenNameconst;
3218 return TokenNameIdentifier;
3220 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3221 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3222 return TokenNamecontinue;
3224 return TokenNameIdentifier;
3226 return TokenNameIdentifier;
3229 // declare default do die
3230 // TODO delete define ==> no keyword !
3233 if ((data[++index] == 'o'))
3236 return TokenNameIdentifier;
3238 // if ((data[++index] == 'e')
3239 // && (data[++index] == 'f')
3240 // && (data[++index] == 'i')
3241 // && (data[++index] == 'n')
3242 // && (data[++index] == 'e'))
3243 // return TokenNamedefine;
3245 // return TokenNameIdentifier;
3247 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3248 && (data[++index] == 'r') && (data[++index] == 'e'))
3249 return TokenNamedeclare;
3251 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3252 && (data[++index] == 'l') && (data[++index] == 't'))
3253 return TokenNamedefault;
3255 return TokenNameIdentifier;
3257 return TokenNameIdentifier;
3260 // echo else exit elseif extends eval
3263 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3264 return TokenNameecho;
3265 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3266 return TokenNameelse;
3267 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3268 return TokenNameexit;
3269 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3270 return TokenNameeval;
3272 return TokenNameIdentifier;
3275 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3276 return TokenNameendif;
3277 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3278 return TokenNameempty;
3280 return TokenNameIdentifier;
3283 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3284 && (data[++index] == 'r'))
3285 return TokenNameendfor;
3286 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3287 && (data[++index] == 'f'))
3288 return TokenNameelseif;
3290 return TokenNameIdentifier;
3292 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3293 && (data[++index] == 'd') && (data[++index] == 's'))
3294 return TokenNameextends;
3296 return TokenNameIdentifier;
3299 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3300 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3301 return TokenNameendwhile;
3303 return TokenNameIdentifier;
3306 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3307 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3308 return TokenNameendswitch;
3310 return TokenNameIdentifier;
3313 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3314 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3315 && (data[++index] == 'e'))
3316 return TokenNameenddeclare;
3318 if ((data[++index] == 'n') // endforeach
3319 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3320 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3321 return TokenNameendforeach;
3323 return TokenNameIdentifier;
3325 return TokenNameIdentifier;
3328 // for false final function
3331 if ((data[++index] == 'o') && (data[++index] == 'r'))
3332 return TokenNamefor;
3334 return TokenNameIdentifier;
3336 // if ((data[++index] == 'a') && (data[++index] == 'l')
3337 // && (data[++index] == 's') && (data[++index] == 'e'))
3338 // return TokenNamefalse;
3339 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3340 return TokenNamefinal;
3342 return TokenNameIdentifier;
3345 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3346 && (data[++index] == 'c') && (data[++index] == 'h'))
3347 return TokenNameforeach;
3349 return TokenNameIdentifier;
3352 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3353 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3354 return TokenNamefunction;
3356 return TokenNameIdentifier;
3358 return TokenNameIdentifier;
3363 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3364 && (data[++index] == 'l')) {
3365 return TokenNameglobal;
3368 return TokenNameIdentifier;
3370 // if int isset include include_once instanceof interface implements
3373 if (data[++index] == 'f')
3376 return TokenNameIdentifier;
3378 // if ((data[++index] == 'n') && (data[++index] == 't'))
3379 // return TokenNameint;
3381 // return TokenNameIdentifier;
3383 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3384 return TokenNameisset;
3386 return TokenNameIdentifier;
3388 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3389 && (data[++index] == 'd') && (data[++index] == 'e'))
3390 return TokenNameinclude;
3392 return TokenNameIdentifier;
3395 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3396 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3397 return TokenNameinterface;
3399 return TokenNameIdentifier;
3402 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3403 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3404 && (data[++index] == 'f'))
3405 return TokenNameinstanceof;
3406 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3407 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3408 && (data[++index] == 's'))
3409 return TokenNameimplements;
3411 return TokenNameIdentifier;
3413 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3414 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3415 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3416 return TokenNameinclude_once;
3418 return TokenNameIdentifier;
3420 return TokenNameIdentifier;
3425 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3426 return TokenNamelist;
3429 return TokenNameIdentifier;
3434 if ((data[++index] == 'e') && (data[++index] == 'w'))
3435 return TokenNamenew;
3437 return TokenNameIdentifier;
3439 // if ((data[++index] == 'u') && (data[++index] == 'l')
3440 // && (data[++index] == 'l'))
3441 // return TokenNamenull;
3443 // return TokenNameIdentifier;
3445 return TokenNameIdentifier;
3450 if (data[++index] == 'r') {
3454 // if (length == 12) {
3455 // if ((data[++index] == 'l')
3456 // && (data[++index] == 'd')
3457 // && (data[++index] == '_')
3458 // && (data[++index] == 'f')
3459 // && (data[++index] == 'u')
3460 // && (data[++index] == 'n')
3461 // && (data[++index] == 'c')
3462 // && (data[++index] == 't')
3463 // && (data[++index] == 'i')
3464 // && (data[++index] == 'o')
3465 // && (data[++index] == 'n')) {
3466 // return TokenNameold_function;
3469 return TokenNameIdentifier;
3471 // print public private protected
3474 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3475 return TokenNameprint;
3477 return TokenNameIdentifier;
3479 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3480 && (data[++index] == 'c')) {
3481 return TokenNamepublic;
3483 return TokenNameIdentifier;
3485 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3486 && (data[++index] == 't') && (data[++index] == 'e')) {
3487 return TokenNameprivate;
3489 return TokenNameIdentifier;
3491 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3492 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3493 return TokenNameprotected;
3495 return TokenNameIdentifier;
3497 return TokenNameIdentifier;
3499 // return require require_once
3501 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3502 && (data[++index] == 'n')) {
3503 return TokenNamereturn;
3505 } else if (length == 7) {
3506 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3507 && (data[++index] == 'r') && (data[++index] == 'e')) {
3508 return TokenNamerequire;
3510 } else if (length == 12) {
3511 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3512 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3513 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3514 return TokenNamerequire_once;
3517 return TokenNameIdentifier;
3522 if (data[++index] == 't')
3523 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3524 return TokenNamestatic;
3526 return TokenNameIdentifier;
3527 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3528 && (data[++index] == 'h'))
3529 return TokenNameswitch;
3531 return TokenNameIdentifier;
3533 return TokenNameIdentifier;
3539 if ((data[++index] == 'r') && (data[++index] == 'y'))
3540 return TokenNametry;
3542 return TokenNameIdentifier;
3544 // if ((data[++index] == 'r') && (data[++index] == 'u')
3545 // && (data[++index] == 'e'))
3546 // return TokenNametrue;
3548 // return TokenNameIdentifier;
3550 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3551 return TokenNamethrow;
3553 return TokenNameIdentifier;
3555 return TokenNameIdentifier;
3561 if ((data[++index] == 's') && (data[++index] == 'e'))
3562 return TokenNameuse;
3564 return TokenNameIdentifier;
3566 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3567 return TokenNameunset;
3569 return TokenNameIdentifier;
3571 return TokenNameIdentifier;
3577 if ((data[++index] == 'a') && (data[++index] == 'r'))
3578 return TokenNamevar;
3580 return TokenNameIdentifier;
3582 return TokenNameIdentifier;
3588 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3589 return TokenNamewhile;
3591 return TokenNameIdentifier;
3592 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3593 // (data[++index]=='e') && (data[++index]=='f')&&
3594 // (data[++index]=='p'))
3595 // return TokenNamewidefp ;
3597 // return TokenNameIdentifier;
3599 return TokenNameIdentifier;
3605 if ((data[++index] == 'o') && (data[++index] == 'r'))
3606 return TokenNamexor;
3608 return TokenNameIdentifier;
3610 return TokenNameIdentifier;
3613 return TokenNameIdentifier;
3617 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3618 // when entering this method the currentCharacter is the firt
3619 // digit of the number , i.e. it may be preceeded by a . when
3620 // dotPrefix is true
3621 boolean floating = dotPrefix;
3622 if ((!dotPrefix) && (currentCharacter == '0')) {
3623 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3624 // force the first char of the hexa number do exist...
3625 // consume next character
3626 unicodeAsBackSlash = false;
3627 currentCharacter = source[currentPosition++];
3628 // if (((currentCharacter = source[currentPosition++]) == '\\')
3629 // && (source[currentPosition] == 'u')) {
3630 // getNextUnicodeChar();
3632 // if (withoutUnicodePtr != 0) {
3633 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3636 if (Character.digit(currentCharacter, 16) == -1)
3637 throw new InvalidInputException(INVALID_HEXA);
3639 while (getNextCharAsDigit(16)) {
3642 // if (getNextChar('l', 'L') >= 0)
3643 // return TokenNameLongLiteral;
3645 return TokenNameIntegerLiteral;
3647 // there is x or X in the number
3648 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3649 // 00078.0 is true !!!!! crazy language
3650 if (getNextCharAsDigit()) {
3651 // -------------potential octal-----------------
3652 while (getNextCharAsDigit()) {
3655 // if (getNextChar('l', 'L') >= 0) {
3656 // return TokenNameLongLiteral;
3659 // if (getNextChar('f', 'F') >= 0) {
3660 // return TokenNameFloatingPointLiteral;
3662 if (getNextChar('d', 'D') >= 0) {
3663 return TokenNameDoubleLiteral;
3664 } else { // make the distinction between octal and float ....
3665 if (getNextChar('.')) { // bingo ! ....
3666 while (getNextCharAsDigit()) {
3669 if (getNextChar('e', 'E') >= 0) {
3670 // consume next character
3671 unicodeAsBackSlash = false;
3672 currentCharacter = source[currentPosition++];
3673 // if (((currentCharacter = source[currentPosition++]) == '\\')
3674 // && (source[currentPosition] == 'u')) {
3675 // getNextUnicodeChar();
3677 // if (withoutUnicodePtr != 0) {
3678 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3681 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3682 // consume next character
3683 unicodeAsBackSlash = false;
3684 currentCharacter = source[currentPosition++];
3685 // if (((currentCharacter = source[currentPosition++]) == '\\')
3686 // && (source[currentPosition] == 'u')) {
3687 // getNextUnicodeChar();
3689 // if (withoutUnicodePtr != 0) {
3690 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3691 // currentCharacter;
3695 if (!Character.isDigit(currentCharacter))
3696 throw new InvalidInputException(INVALID_FLOAT);
3697 while (getNextCharAsDigit()) {
3701 // if (getNextChar('f', 'F') >= 0)
3702 // return TokenNameFloatingPointLiteral;
3703 getNextChar('d', 'D'); // jump over potential d or D
3704 return TokenNameDoubleLiteral;
3706 return TokenNameIntegerLiteral;
3713 while (getNextCharAsDigit()) {
3716 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3717 // return TokenNameLongLiteral;
3718 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3719 while (getNextCharAsDigit()) {
3724 // if floating is true both exponant and suffix may be optional
3725 if (getNextChar('e', 'E') >= 0) {
3727 // consume next character
3728 unicodeAsBackSlash = false;
3729 currentCharacter = source[currentPosition++];
3730 // if (((currentCharacter = source[currentPosition++]) == '\\')
3731 // && (source[currentPosition] == 'u')) {
3732 // getNextUnicodeChar();
3734 // if (withoutUnicodePtr != 0) {
3735 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3738 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3741 unicodeAsBackSlash = false;
3742 currentCharacter = source[currentPosition++];
3743 // if (((currentCharacter = source[currentPosition++]) == '\\')
3744 // && (source[currentPosition] == 'u')) {
3745 // getNextUnicodeChar();
3747 // if (withoutUnicodePtr != 0) {
3748 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3752 if (!Character.isDigit(currentCharacter))
3753 throw new InvalidInputException(INVALID_FLOAT);
3754 while (getNextCharAsDigit()) {
3758 if (getNextChar('d', 'D') >= 0)
3759 return TokenNameDoubleLiteral;
3760 // if (getNextChar('f', 'F') >= 0)
3761 // return TokenNameFloatingPointLiteral;
3762 // the long flag has been tested before
3763 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3767 * Search the line number corresponding to a specific position
3770 public final int getLineNumber(int position) {
3771 if (lineEnds == null)
3773 int length = linePtr + 1;
3776 int g = 0, d = length - 1;
3780 if (position < lineEnds[m]) {
3782 } else if (position > lineEnds[m]) {
3788 if (position < lineEnds[m]) {
3794 public void setPHPMode(boolean mode) {
3798 public final void setSource(char[] source) {
3799 setSource(null, source);
3802 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3803 // the source-buffer is set to sourceString
3804 this.compilationUnit = compilationUnit;
3805 if (source == null) {
3806 this.source = new char[0];
3808 this.source = source;
3811 initialPosition = currentPosition = 0;
3812 containsAssertKeyword = false;
3813 withoutUnicodeBuffer = new char[this.source.length];
3814 // encapsedStringStack = new Stack();
3817 public String toString() {
3818 if (startPosition == source.length)
3819 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3820 if (currentPosition > source.length)
3821 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3822 char front[] = new char[startPosition];
3823 System.arraycopy(source, 0, front, 0, startPosition);
3824 int middleLength = (currentPosition - 1) - startPosition + 1;
3826 if (middleLength > -1) {
3827 middle = new char[middleLength];
3828 System.arraycopy(source, startPosition, middle, 0, middleLength);
3830 middle = new char[0];
3832 char end[] = new char[source.length - (currentPosition - 1)];
3833 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3834 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3835 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3839 public final String toStringAction(int act) {
3841 case TokenNameERROR:
3842 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3844 case TokenNameINLINE_HTML:
3845 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3846 case TokenNameIdentifier:
3847 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3848 case TokenNameVariable:
3849 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3850 case TokenNameabstract:
3851 return "abstract"; //$NON-NLS-1$
3853 return "AND"; //$NON-NLS-1$
3854 case TokenNamearray:
3855 return "array"; //$NON-NLS-1$
3857 return "as"; //$NON-NLS-1$
3858 case TokenNamebreak:
3859 return "break"; //$NON-NLS-1$
3861 return "case"; //$NON-NLS-1$
3862 case TokenNameclass:
3863 return "class"; //$NON-NLS-1$
3864 case TokenNamecatch:
3865 return "catch"; //$NON-NLS-1$
3866 case TokenNameclone:
3869 case TokenNameconst:
3872 case TokenNamecontinue:
3873 return "continue"; //$NON-NLS-1$
3874 case TokenNamedefault:
3875 return "default"; //$NON-NLS-1$
3876 // case TokenNamedefine :
3877 // return "define"; //$NON-NLS-1$
3879 return "do"; //$NON-NLS-1$
3881 return "echo"; //$NON-NLS-1$
3883 return "else"; //$NON-NLS-1$
3884 case TokenNameelseif:
3885 return "elseif"; //$NON-NLS-1$
3886 case TokenNameendfor:
3887 return "endfor"; //$NON-NLS-1$
3888 case TokenNameendforeach:
3889 return "endforeach"; //$NON-NLS-1$
3890 case TokenNameendif:
3891 return "endif"; //$NON-NLS-1$
3892 case TokenNameendswitch:
3893 return "endswitch"; //$NON-NLS-1$
3894 case TokenNameendwhile:
3895 return "endwhile"; //$NON-NLS-1$
3898 case TokenNameextends:
3899 return "extends"; //$NON-NLS-1$
3900 // case TokenNamefalse :
3901 // return "false"; //$NON-NLS-1$
3902 case TokenNamefinal:
3903 return "final"; //$NON-NLS-1$
3905 return "for"; //$NON-NLS-1$
3906 case TokenNameforeach:
3907 return "foreach"; //$NON-NLS-1$
3908 case TokenNamefunction:
3909 return "function"; //$NON-NLS-1$
3910 case TokenNameglobal:
3911 return "global"; //$NON-NLS-1$
3913 return "if"; //$NON-NLS-1$
3914 case TokenNameimplements:
3915 return "implements"; //$NON-NLS-1$
3916 case TokenNameinclude:
3917 return "include"; //$NON-NLS-1$
3918 case TokenNameinclude_once:
3919 return "include_once"; //$NON-NLS-1$
3920 case TokenNameinstanceof:
3921 return "instanceof"; //$NON-NLS-1$
3922 case TokenNameinterface:
3923 return "interface"; //$NON-NLS-1$
3924 case TokenNameisset:
3925 return "isset"; //$NON-NLS-1$
3927 return "list"; //$NON-NLS-1$
3929 return "new"; //$NON-NLS-1$
3930 // case TokenNamenull :
3931 // return "null"; //$NON-NLS-1$
3933 return "OR"; //$NON-NLS-1$
3934 case TokenNameprint:
3935 return "print"; //$NON-NLS-1$
3936 case TokenNameprivate:
3937 return "private"; //$NON-NLS-1$
3938 case TokenNameprotected:
3939 return "protected"; //$NON-NLS-1$
3940 case TokenNamepublic:
3941 return "public"; //$NON-NLS-1$
3942 case TokenNamerequire:
3943 return "require"; //$NON-NLS-1$
3944 case TokenNamerequire_once:
3945 return "require_once"; //$NON-NLS-1$
3946 case TokenNamereturn:
3947 return "return"; //$NON-NLS-1$
3948 case TokenNamestatic:
3949 return "static"; //$NON-NLS-1$
3950 case TokenNameswitch:
3951 return "switch"; //$NON-NLS-1$
3952 // case TokenNametrue :
3953 // return "true"; //$NON-NLS-1$
3954 case TokenNameunset:
3955 return "unset"; //$NON-NLS-1$
3957 return "var"; //$NON-NLS-1$
3958 case TokenNamewhile:
3959 return "while"; //$NON-NLS-1$
3961 return "XOR"; //$NON-NLS-1$
3962 // case TokenNamethis :
3963 // return "$this"; //$NON-NLS-1$
3964 case TokenNameIntegerLiteral:
3965 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3966 case TokenNameDoubleLiteral:
3967 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3968 case TokenNameStringDoubleQuote:
3969 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3970 case TokenNameStringSingleQuote:
3971 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3972 case TokenNameStringInterpolated:
3973 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3974 case TokenNameEncapsedString0:
3975 return "`"; //$NON-NLS-1$
3976 // case TokenNameEncapsedString1:
3977 // return "\'"; //$NON-NLS-1$
3978 // case TokenNameEncapsedString2:
3979 // return "\""; //$NON-NLS-1$
3980 case TokenNameSTRING:
3981 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3982 case TokenNameHEREDOC:
3983 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3984 case TokenNamePLUS_PLUS:
3985 return "++"; //$NON-NLS-1$
3986 case TokenNameMINUS_MINUS:
3987 return "--"; //$NON-NLS-1$
3988 case TokenNameEQUAL_EQUAL:
3989 return "=="; //$NON-NLS-1$
3990 case TokenNameEQUAL_EQUAL_EQUAL:
3991 return "==="; //$NON-NLS-1$
3992 case TokenNameEQUAL_GREATER:
3993 return "=>"; //$NON-NLS-1$
3994 case TokenNameLESS_EQUAL:
3995 return "<="; //$NON-NLS-1$
3996 case TokenNameGREATER_EQUAL:
3997 return ">="; //$NON-NLS-1$
3998 case TokenNameNOT_EQUAL:
3999 return "!="; //$NON-NLS-1$
4000 case TokenNameNOT_EQUAL_EQUAL:
4001 return "!=="; //$NON-NLS-1$
4002 case TokenNameLEFT_SHIFT:
4003 return "<<"; //$NON-NLS-1$
4004 case TokenNameRIGHT_SHIFT:
4005 return ">>"; //$NON-NLS-1$
4006 case TokenNamePLUS_EQUAL:
4007 return "+="; //$NON-NLS-1$
4008 case TokenNameMINUS_EQUAL:
4009 return "-="; //$NON-NLS-1$
4010 case TokenNameMULTIPLY_EQUAL:
4011 return "*="; //$NON-NLS-1$
4012 case TokenNameDIVIDE_EQUAL:
4013 return "/="; //$NON-NLS-1$
4014 case TokenNameAND_EQUAL:
4015 return "&="; //$NON-NLS-1$
4016 case TokenNameOR_EQUAL:
4017 return "|="; //$NON-NLS-1$
4018 case TokenNameXOR_EQUAL:
4019 return "^="; //$NON-NLS-1$
4020 case TokenNameREMAINDER_EQUAL:
4021 return "%="; //$NON-NLS-1$
4022 case TokenNameDOT_EQUAL:
4023 return ".="; //$NON-NLS-1$
4024 case TokenNameLEFT_SHIFT_EQUAL:
4025 return "<<="; //$NON-NLS-1$
4026 case TokenNameRIGHT_SHIFT_EQUAL:
4027 return ">>="; //$NON-NLS-1$
4028 case TokenNameOR_OR:
4029 return "||"; //$NON-NLS-1$
4030 case TokenNameAND_AND:
4031 return "&&"; //$NON-NLS-1$
4033 return "+"; //$NON-NLS-1$
4034 case TokenNameMINUS:
4035 return "-"; //$NON-NLS-1$
4036 case TokenNameMINUS_GREATER:
4039 return "!"; //$NON-NLS-1$
4040 case TokenNameREMAINDER:
4041 return "%"; //$NON-NLS-1$
4043 return "^"; //$NON-NLS-1$
4045 return "&"; //$NON-NLS-1$
4046 case TokenNameMULTIPLY:
4047 return "*"; //$NON-NLS-1$
4049 return "|"; //$NON-NLS-1$
4050 case TokenNameTWIDDLE:
4051 return "~"; //$NON-NLS-1$
4052 case TokenNameTWIDDLE_EQUAL:
4053 return "~="; //$NON-NLS-1$
4054 case TokenNameDIVIDE:
4055 return "/"; //$NON-NLS-1$
4056 case TokenNameGREATER:
4057 return ">"; //$NON-NLS-1$
4059 return "<"; //$NON-NLS-1$
4060 case TokenNameLPAREN:
4061 return "("; //$NON-NLS-1$
4062 case TokenNameRPAREN:
4063 return ")"; //$NON-NLS-1$
4064 case TokenNameLBRACE:
4065 return "{"; //$NON-NLS-1$
4066 case TokenNameRBRACE:
4067 return "}"; //$NON-NLS-1$
4068 case TokenNameLBRACKET:
4069 return "["; //$NON-NLS-1$
4070 case TokenNameRBRACKET:
4071 return "]"; //$NON-NLS-1$
4072 case TokenNameSEMICOLON:
4073 return ";"; //$NON-NLS-1$
4074 case TokenNameQUESTION:
4075 return "?"; //$NON-NLS-1$
4076 case TokenNameCOLON:
4077 return ":"; //$NON-NLS-1$
4078 case TokenNameCOMMA:
4079 return ","; //$NON-NLS-1$
4081 return "."; //$NON-NLS-1$
4082 case TokenNameEQUAL:
4083 return "="; //$NON-NLS-1$
4086 case TokenNameDOLLAR:
4088 case TokenNameDOLLAR_LBRACE:
4090 case TokenNameLBRACE_DOLLAR:
4093 return "EOF"; //$NON-NLS-1$
4094 case TokenNameWHITESPACE:
4095 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4096 case TokenNameCOMMENT_LINE:
4097 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4098 case TokenNameCOMMENT_BLOCK:
4099 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4100 case TokenNameCOMMENT_PHPDOC:
4101 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4102 // case TokenNameHTML :
4103 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4106 return "__FILE__"; //$NON-NLS-1$
4108 return "__LINE__"; //$NON-NLS-1$
4109 case TokenNameCLASS_C:
4110 return "__CLASS__"; //$NON-NLS-1$
4111 case TokenNameMETHOD_C:
4112 return "__METHOD__"; //$NON-NLS-1$
4113 case TokenNameFUNC_C:
4114 return "__FUNCTION__"; //$NON-NLS-1
4115 case TokenNameboolCAST:
4116 return "( bool )"; //$NON-NLS-1$
4117 case TokenNameintCAST:
4118 return "( int )"; //$NON-NLS-1$
4119 case TokenNamedoubleCAST:
4120 return "( double )"; //$NON-NLS-1$
4121 case TokenNameobjectCAST:
4122 return "( object )"; //$NON-NLS-1$
4123 case TokenNamestringCAST:
4124 return "( string )"; //$NON-NLS-1$
4126 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4134 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4135 this(tokenizeComments, tokenizeWhiteSpace, false);
4138 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4139 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4142 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4143 boolean assertMode) {
4144 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4147 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4148 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4149 this.eofPosition = Integer.MAX_VALUE;
4150 this.tokenizeComments = tokenizeComments;
4151 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4152 this.tokenizeStrings = tokenizeStrings;
4153 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4154 // this.assertMode = assertMode;
4155 // this.encapsedStringStack = null;
4156 this.taskTags = taskTags;
4157 this.taskPriorities = taskPriorities;
4160 private void checkNonExternalizeString() throws InvalidInputException {
4161 if (currentLine == null)
4163 parseTags(currentLine);
4166 private void parseTags(NLSLine line) throws InvalidInputException {
4167 String s = new String(getCurrentTokenSource());
4168 int pos = s.indexOf(TAG_PREFIX);
4169 int lineLength = line.size();
4171 int start = pos + TAG_PREFIX_LENGTH;
4172 int end = s.indexOf(TAG_POSTFIX, start);
4173 String index = s.substring(start, end);
4176 i = Integer.parseInt(index) - 1;
4177 // Tags are one based not zero based.
4178 } catch (NumberFormatException e) {
4179 i = -1; // we don't want to consider this as a valid NLS tag
4181 if (line.exists(i)) {
4184 pos = s.indexOf(TAG_PREFIX, start);
4186 this.nonNLSStrings = new StringLiteral[lineLength];
4187 int nonNLSCounter = 0;
4188 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4189 StringLiteral literal = (StringLiteral) iterator.next();
4190 if (literal != null) {
4191 this.nonNLSStrings[nonNLSCounter++] = literal;
4194 if (nonNLSCounter == 0) {
4195 this.nonNLSStrings = null;
4199 this.wasNonExternalizedStringLiteral = true;
4200 if (nonNLSCounter != lineLength) {
4201 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4206 public final void scanEscapeCharacter() throws InvalidInputException {
4207 // the string with "\\u" is a legal string of two chars \ and u
4208 // thus we use a direct access to the source (for regular cases).
4209 if (unicodeAsBackSlash) {
4210 // consume next character
4211 unicodeAsBackSlash = false;
4212 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4213 // (source[currentPosition] == 'u')) {
4214 // getNextUnicodeChar();
4216 if (withoutUnicodePtr != 0) {
4217 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4221 currentCharacter = source[currentPosition++];
4222 switch (currentCharacter) {
4224 currentCharacter = '\b';
4227 currentCharacter = '\t';
4230 currentCharacter = '\n';
4233 currentCharacter = '\f';
4236 currentCharacter = '\r';
4239 currentCharacter = '\"';
4242 currentCharacter = '\'';
4245 currentCharacter = '\\';
4248 // -----------octal escape--------------
4250 // OctalDigit OctalDigit
4251 // ZeroToThree OctalDigit OctalDigit
4252 int number = Character.getNumericValue(currentCharacter);
4253 if (number >= 0 && number <= 7) {
4254 boolean zeroToThreeNot = number > 3;
4255 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4256 int digit = Character.getNumericValue(currentCharacter);
4257 if (digit >= 0 && digit <= 7) {
4258 number = (number * 8) + digit;
4259 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4260 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4261 // Digit --> ignore last character
4264 digit = Character.getNumericValue(currentCharacter);
4265 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4266 // OctalDigit OctalDigit
4267 number = (number * 8) + digit;
4268 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4269 // --> ignore last character
4273 } else { // has read \OctalDigit NonDigit--> ignore last
4277 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4281 } else { // has read \OctalDigit --> ignore last character
4285 throw new InvalidInputException(INVALID_ESCAPE);
4286 currentCharacter = (char) number;
4288 throw new InvalidInputException(INVALID_ESCAPE);
4292 // chech presence of task: tags
4293 // TODO (frederic) see if we need to take unicode characters into account...
4294 public void checkTaskTag(int commentStart, int commentEnd) {
4295 char[] src = this.source;
4297 // only look for newer task: tags
4298 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4301 int foundTaskIndex = this.foundTaskCount;
4302 char previous = src[commentStart + 1]; // should be '*' or '/'
4303 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4305 char[] priority = null;
4306 // check for tag occurrence only if not ambiguous with javadoc tag
4307 if (previous != '@') {
4308 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4309 tag = this.taskTags[itag];
4310 int tagLength = tag.length;
4314 // ensure tag is not leaded with letter if tag starts with a letter
4315 if (Scanner.isPHPIdentifierStart(tag[0])) {
4316 if (Scanner.isPHPIdentifierPart(previous)) {
4321 for (int t = 0; t < tagLength; t++) {
4324 if (x >= this.eofPosition || x >= commentEnd)
4326 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4327 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4334 // ensure tag is not followed with letter if tag finishes with a
4336 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4337 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4340 if (this.foundTaskTags == null) {
4341 this.foundTaskTags = new char[5][];
4342 this.foundTaskMessages = new char[5][];
4343 this.foundTaskPriorities = new char[5][];
4344 this.foundTaskPositions = new int[5][];
4345 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4346 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4347 this.foundTaskCount);
4348 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4349 this.foundTaskCount);
4350 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4351 this.foundTaskCount);
4352 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4353 this.foundTaskCount);
4356 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4358 this.foundTaskTags[this.foundTaskCount] = tag;
4359 this.foundTaskPriorities[this.foundTaskCount] = priority;
4360 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4361 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4362 this.foundTaskCount++;
4363 i += tagLength - 1; // will be incremented when looping
4369 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4370 // retrieve message start and end positions
4371 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4372 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4373 // at most beginning of next task
4374 if (max_value < msgStart) {
4375 max_value = msgStart; // would only occur if tag is before EOF.
4379 for (int j = msgStart; j < max_value; j++) {
4380 if ((c = src[j]) == '\n' || c == '\r') {
4386 for (int j = max_value; j > msgStart; j--) {
4387 if ((c = src[j]) == '*') {
4395 if (msgStart == end)
4398 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4400 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4402 // update the end position of the task
4403 this.foundTaskPositions[i][1] = end;
4404 // get the message source
4405 final int messageLength = end - msgStart + 1;
4406 char[] message = new char[messageLength];
4407 System.arraycopy(src, msgStart, message, 0, messageLength);
4408 this.foundTaskMessages[i] = message;
4412 // chech presence of task: tags
4413 // public void checkTaskTag(int commentStart, int commentEnd) {
4414 // // only look for newer task: tags
4415 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4416 // - 1][0] >= commentStart) {
4419 // int foundTaskIndex = this.foundTaskCount;
4420 // nextChar: for (int i = commentStart; i < commentEnd && i <
4421 // this.eofPosition; i++) {
4422 // char[] tag = null;
4423 // char[] priority = null;
4424 // // check for tag occurrence
4425 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4426 // tag = this.taskTags[itag];
4427 // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4428 // ? this.taskPriorities[itag] : null;
4429 // int tagLength = tag.length;
4430 // for (int t = 0; t < tagLength; t++) {
4431 // if (this.source[i + t] != tag[t])
4432 // continue nextTag;
4434 // if (this.foundTaskTags == null) {
4435 // this.foundTaskTags = new char[5][];
4436 // this.foundTaskMessages = new char[5][];
4437 // this.foundTaskPriorities = new char[5][];
4438 // this.foundTaskPositions = new int[5][];
4439 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4440 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4441 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4442 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4443 // char[this.foundTaskCount * 2][], 0,
4444 // this.foundTaskCount);
4445 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4446 // new char[this.foundTaskCount * 2][], 0,
4447 // this.foundTaskCount);
4448 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4449 // int[this.foundTaskCount * 2][], 0,
4450 // this.foundTaskCount);
4452 // this.foundTaskTags[this.foundTaskCount] = tag;
4453 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4454 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4456 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4457 // this.foundTaskCount++;
4458 // i += tagLength - 1; // will be incremented when looping
4461 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4462 // // retrieve message start and end positions
4463 // int msgStart = this.foundTaskPositions[i][0] +
4464 // this.foundTaskTags[i].length;
4465 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4466 // 1][0] - 1 : commentEnd - 1;
4467 // // at most beginning of next task
4468 // if (max_value < msgStart)
4469 // max_value = msgStart; // would only occur if tag is before EOF.
4472 // for (int j = msgStart; j < max_value; j++) {
4473 // if ((c = this.source[j]) == '\n' || c == '\r') {
4479 // for (int j = max_value; j > msgStart; j--) {
4480 // if ((c = this.source[j]) == '*') {
4488 // if (msgStart == end)
4489 // continue; // empty
4490 // // trim the message
4491 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4493 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4495 // // update the end position of the task
4496 // this.foundTaskPositions[i][1] = end;
4497 // // get the message source
4498 // final int messageLength = end - msgStart + 1;
4499 // char[] message = new char[messageLength];
4500 // System.arraycopy(source, msgStart, message, 0, messageLength);
4501 // this.foundTaskMessages[i] = message;