1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this
24 * value is not memorized by the scanner) - getCurrentTokenSource() which
25 * provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 // private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 // flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 // public boolean phpExpressionTag = false;
44 int fFillerToken = TokenNameEOF;
45 // public Stack encapsedStringStack = null;
47 public char currentCharacter;
49 public int startPosition;
51 public int currentPosition;
53 public int initialPosition, eofPosition;
55 // after this position eof are generated instead of real token from the
57 public boolean tokenizeComments;
59 public boolean tokenizeWhiteSpace;
61 public boolean tokenizeStrings;
63 // source should be viewed as a window (aka a part)
64 // of a entire very large stream
68 public char[] withoutUnicodeBuffer;
70 public int withoutUnicodePtr;
72 // when == 0 ==> no unicode in the current token
73 public boolean unicodeAsBackSlash = false;
75 public boolean scanningFloatLiteral = false;
77 // support for /** comments
78 public int[] commentStops = new int[10];
80 public int[] commentStarts = new int[10];
82 public int commentPtr = -1; // no comment test with commentPtr value -1
84 protected int lastCommentLinePosition = -1;
86 // diet parsing support - jump over some method body when requested
87 public boolean diet = false;
89 // support for the poor-line-debuggers ....
90 // remember the position of the cr/lf
91 public int[] lineEnds = new int[250];
93 public int linePtr = -1;
95 public boolean wasAcr = false;
97 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
99 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
101 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
103 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
105 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
107 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
109 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
111 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
113 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
115 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
117 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
119 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
121 // ----------------optimized identifier managment------------------
122 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
123 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
124 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
125 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
126 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
127 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
128 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
129 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
130 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
132 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133 '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
134 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
142 public final static int MAX_OBVIOUS = 256;
144 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
146 public final static int C_DOLLAR = 8;
148 public final static int C_LETTER = 4;
150 public final static int C_DIGIT = 3;
152 public final static int C_SEPARATOR = 2;
154 public final static int C_SPACE = 1;
156 for (int i = '0'; i <= '9'; i++)
157 ObviousIdentCharNatures[i] = C_DIGIT;
159 for (int i = 'a'; i <= 'z'; i++)
160 ObviousIdentCharNatures[i] = C_LETTER;
161 for (int i = 'A'; i <= 'Z'; i++)
162 ObviousIdentCharNatures[i] = C_LETTER;
163 ObviousIdentCharNatures['_'] = C_LETTER;
164 for (int i = 127; i <= 255; i++)
165 ObviousIdentCharNatures[i] = C_LETTER;
167 ObviousIdentCharNatures['$'] = C_DOLLAR;
169 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
170 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
171 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
172 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
173 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
175 ObviousIdentCharNatures['.'] = C_SEPARATOR;
176 ObviousIdentCharNatures[':'] = C_SEPARATOR;
177 ObviousIdentCharNatures[';'] = C_SEPARATOR;
178 ObviousIdentCharNatures[','] = C_SEPARATOR;
179 ObviousIdentCharNatures['['] = C_SEPARATOR;
180 ObviousIdentCharNatures[']'] = C_SEPARATOR;
181 ObviousIdentCharNatures['('] = C_SEPARATOR;
182 ObviousIdentCharNatures[')'] = C_SEPARATOR;
183 ObviousIdentCharNatures['{'] = C_SEPARATOR;
184 ObviousIdentCharNatures['}'] = C_SEPARATOR;
185 ObviousIdentCharNatures['+'] = C_SEPARATOR;
186 ObviousIdentCharNatures['-'] = C_SEPARATOR;
187 ObviousIdentCharNatures['*'] = C_SEPARATOR;
188 ObviousIdentCharNatures['/'] = C_SEPARATOR;
189 ObviousIdentCharNatures['='] = C_SEPARATOR;
190 ObviousIdentCharNatures['&'] = C_SEPARATOR;
191 ObviousIdentCharNatures['|'] = C_SEPARATOR;
192 ObviousIdentCharNatures['?'] = C_SEPARATOR;
193 ObviousIdentCharNatures['<'] = C_SEPARATOR;
194 ObviousIdentCharNatures['>'] = C_SEPARATOR;
195 ObviousIdentCharNatures['!'] = C_SEPARATOR;
196 ObviousIdentCharNatures['%'] = C_SEPARATOR;
197 ObviousIdentCharNatures['^'] = C_SEPARATOR;
198 ObviousIdentCharNatures['~'] = C_SEPARATOR;
199 ObviousIdentCharNatures['"'] = C_SEPARATOR;
200 ObviousIdentCharNatures['\''] = C_SEPARATOR;
203 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
205 static final int TableSize = 30, InternalTableSize = 6;
207 // 30*6 = 180 entries
208 public static final int OptimizedLength = 6;
211 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
213 // support for detecting non-externalized string literals
214 int currentLineNr = -1;
216 int previousLineNr = -1;
218 NLSLine currentLine = null;
220 List lines = new ArrayList();
222 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
224 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
226 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
228 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
230 public StringLiteral[] nonNLSStrings = null;
232 public boolean checkNonExternalizedStringLiterals = true;
234 public boolean wasNonExternalizedStringLiteral = false;
237 for (int i = 0; i < 6; i++) {
238 for (int j = 0; j < TableSize; j++) {
239 for (int k = 0; k < InternalTableSize; k++) {
240 charArray_length[i][j][k] = initCharArray;
246 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
248 public static final int RoundBracket = 0;
250 public static final int SquareBracket = 1;
252 public static final int CurlyBracket = 2;
254 public static final int BracketKinds = 3;
257 public char[][] foundTaskTags = null;
259 public char[][] foundTaskMessages;
261 public char[][] foundTaskPriorities = null;
263 public int[][] foundTaskPositions;
265 public int foundTaskCount = 0;
267 public char[][] taskTags = null;
269 public char[][] taskPriorities = null;
271 public boolean isTaskCaseSensitive = true;
273 public static final boolean DEBUG = false;
275 public static final boolean TRACE = false;
277 public ICompilationUnit compilationUnit = null;
280 * Determines if the specified character is permissible as the first character
281 * in a PHP identifier or variable
283 * The '$' character for PHP variables is regarded as a correct first
287 public static boolean isPHPIdentOrVarStart(char ch) {
288 if (ch < MAX_OBVIOUS) {
289 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
292 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
293 // ch && ch <= 0xFF);
297 * Determines if the specified character is permissible as the first character
298 * in a PHP identifier.
300 * The '$' character for PHP variables isn't regarded as the first character !
302 public static boolean isPHPIdentifierStart(char ch) {
303 if (ch < MAX_OBVIOUS) {
304 return ObviousIdentCharNatures[ch] == C_LETTER;
307 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
312 * Determines if the specified character may be part of a PHP identifier as
313 * other than the first character
315 public static boolean isPHPIdentifierPart(char ch) {
316 if (ch < MAX_OBVIOUS) {
317 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
320 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
324 public static boolean isSQLIdentifierPart(char ch) {
325 if (ch < MAX_OBVIOUS) {
326 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
331 public final boolean atEnd() {
332 // This code is not relevant if source is
333 // Only a part of the real stream input
334 return source.length == currentPosition;
337 public char[] getCurrentIdentifierSource() {
338 // return the token REAL source (aka unicodes are precomputed)
340 // if (withoutUnicodePtr != 0)
341 // //0 is used as a fast test flag so the real first char is in position 1
343 // withoutUnicodeBuffer,
345 // result = new char[withoutUnicodePtr],
347 // withoutUnicodePtr);
349 int length = currentPosition - startPosition;
350 switch (length) { // see OptimizedLength
352 return optimizedCurrentTokenSource1();
354 return optimizedCurrentTokenSource2();
356 return optimizedCurrentTokenSource3();
358 return optimizedCurrentTokenSource4();
360 return optimizedCurrentTokenSource5();
362 return optimizedCurrentTokenSource6();
365 System.arraycopy(source, startPosition, result = new char[length], 0, length);
370 public int getCurrentTokenEndPosition() {
371 return this.currentPosition - 1;
374 public final char[] getCurrentTokenSource() {
375 // Return the token REAL source (aka unicodes are precomputed)
377 // if (withoutUnicodePtr != 0)
378 // // 0 is used as a fast test flag so the real first char is in position 1
380 // withoutUnicodeBuffer,
382 // result = new char[withoutUnicodePtr],
384 // withoutUnicodePtr);
387 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
392 public final char[] getCurrentTokenSource(int startPos) {
393 // Return the token REAL source (aka unicodes are precomputed)
395 // if (withoutUnicodePtr != 0)
396 // // 0 is used as a fast test flag so the real first char is in position 1
398 // withoutUnicodeBuffer,
400 // result = new char[withoutUnicodePtr],
402 // withoutUnicodePtr);
405 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
410 public final char[] getCurrentTokenSourceString() {
411 // return the token REAL source (aka unicodes are precomputed).
412 // REMOVE the two " that are at the beginning and the end.
414 if (withoutUnicodePtr != 0)
415 // 0 is used as a fast test flag so the real first char is in position 1
416 System.arraycopy(withoutUnicodeBuffer, 2,
417 // 2 is 1 (real start) + 1 (to jump over the ")
418 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
421 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
426 public final boolean equalsCurrentTokenSource(char[] word) {
427 if (word.length != currentPosition - startPosition) {
430 for (int i = 0; i < word.length; i++) {
431 if (word[i] != source[startPosition + i]) {
438 public final char[] getRawTokenSourceEnd() {
439 int length = this.eofPosition - this.currentPosition - 1;
440 char[] sourceEnd = new char[length];
441 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
445 public int getCurrentTokenStartPosition() {
446 return this.startPosition;
449 public final String getCurrentStringLiteral() {
450 char[] result = getCurrentStringLiteralSource();
451 return new String(result);
454 public final char[] getCurrentStringLiteralSource() {
455 // Return the token REAL source (aka unicodes are precomputed)
456 if (startPosition + 1 >= currentPosition) {
461 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
466 public final char[] getCurrentStringLiteralSource(int startPos) {
467 // Return the token REAL source (aka unicodes are precomputed)
470 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
476 * Search the source position corresponding to the end of a given line number
478 * Line numbers are 1-based, and relative to the scanner initialPosition.
479 * Character positions are 0-based.
481 * In case the given line number is inconsistent, answers -1.
483 public final int getLineEnd(int lineNumber) {
484 if (lineEnds == null)
486 if (lineNumber >= lineEnds.length)
490 if (lineNumber == lineEnds.length - 1)
492 return lineEnds[lineNumber - 1];
493 // next line start one character behind the lineEnd of the previous line
497 * Search the source position corresponding to the beginning of a given line
500 * Line numbers are 1-based, and relative to the scanner initialPosition.
501 * Character positions are 0-based.
503 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
505 * In case the given line number is inconsistent, answers -1.
507 public final int getLineStart(int lineNumber) {
508 if (lineEnds == null)
510 if (lineNumber >= lineEnds.length)
515 return initialPosition;
516 return lineEnds[lineNumber - 2] + 1;
517 // next line start one character behind the lineEnd of the previous line
520 public final boolean getNextChar(char testedChar) {
522 // handle the case of unicode.
523 // when a unicode appears then we must use a buffer that holds char
525 // At the end of this method currentCharacter holds the new visited char
526 // and currentPosition points right next after it
527 // Both previous lines are true if the currentCharacter is == to the
529 // On false, no side effect has occured.
530 // ALL getNextChar.... ARE OPTIMIZED COPIES
531 int temp = currentPosition;
533 currentCharacter = source[currentPosition++];
534 // if (((currentCharacter = source[currentPosition++]) == '\\')
535 // && (source[currentPosition] == 'u')) {
536 // //-------------unicode traitement ------------
537 // int c1, c2, c3, c4;
538 // int unicodeSize = 6;
539 // currentPosition++;
540 // while (source[currentPosition] == 'u') {
541 // currentPosition++;
545 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
547 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
549 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
551 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
553 // currentPosition = temp;
557 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
558 // if (currentCharacter != testedChar) {
559 // currentPosition = temp;
562 // unicodeAsBackSlash = currentCharacter == '\\';
564 // //need the unicode buffer
565 // if (withoutUnicodePtr == 0) {
566 // //buffer all the entries that have been left aside....
567 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
571 // withoutUnicodeBuffer,
573 // withoutUnicodePtr);
575 // //fill the buffer with the char
576 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
579 // } //-------------end unicode traitement--------------
581 if (currentCharacter != testedChar) {
582 currentPosition = temp;
585 unicodeAsBackSlash = false;
586 // if (withoutUnicodePtr != 0)
587 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
590 } catch (IndexOutOfBoundsException e) {
591 unicodeAsBackSlash = false;
592 currentPosition = temp;
597 public final int getNextChar(char testedChar1, char testedChar2) {
598 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
599 // test can be done with (x==0) for the first and (x>0) for the second
600 // handle the case of unicode.
601 // when a unicode appears then we must use a buffer that holds char
603 // At the end of this method currentCharacter holds the new visited char
604 // and currentPosition points right next after it
605 // Both previous lines are true if the currentCharacter is == to the
607 // On false, no side effect has occured.
608 // ALL getNextChar.... ARE OPTIMIZED COPIES
609 int temp = currentPosition;
612 currentCharacter = source[currentPosition++];
613 // if (((currentCharacter = source[currentPosition++]) == '\\')
614 // && (source[currentPosition] == 'u')) {
615 // //-------------unicode traitement ------------
616 // int c1, c2, c3, c4;
617 // int unicodeSize = 6;
618 // currentPosition++;
619 // while (source[currentPosition] == 'u') {
620 // currentPosition++;
624 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
626 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
628 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
630 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
632 // currentPosition = temp;
636 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
637 // if (currentCharacter == testedChar1)
639 // else if (currentCharacter == testedChar2)
642 // currentPosition = temp;
646 // //need the unicode buffer
647 // if (withoutUnicodePtr == 0) {
648 // //buffer all the entries that have been left aside....
649 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
653 // withoutUnicodeBuffer,
655 // withoutUnicodePtr);
657 // //fill the buffer with the char
658 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
660 // } //-------------end unicode traitement--------------
662 if (currentCharacter == testedChar1)
664 else if (currentCharacter == testedChar2)
667 currentPosition = temp;
670 // if (withoutUnicodePtr != 0)
671 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
674 } catch (IndexOutOfBoundsException e) {
675 currentPosition = temp;
680 public final boolean getNextCharAsDigit() {
682 // handle the case of unicode.
683 // when a unicode appears then we must use a buffer that holds char
685 // At the end of this method currentCharacter holds the new visited char
686 // and currentPosition points right next after it
687 // Both previous lines are true if the currentCharacter is a digit
688 // On false, no side effect has occured.
689 // ALL getNextChar.... ARE OPTIMIZED COPIES
690 int temp = currentPosition;
692 currentCharacter = source[currentPosition++];
693 // if (((currentCharacter = source[currentPosition++]) == '\\')
694 // && (source[currentPosition] == 'u')) {
695 // //-------------unicode traitement ------------
696 // int c1, c2, c3, c4;
697 // int unicodeSize = 6;
698 // currentPosition++;
699 // while (source[currentPosition] == 'u') {
700 // currentPosition++;
704 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
706 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
708 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
710 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
712 // currentPosition = temp;
716 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
717 // if (!Character.isDigit(currentCharacter)) {
718 // currentPosition = temp;
722 // //need the unicode buffer
723 // if (withoutUnicodePtr == 0) {
724 // //buffer all the entries that have been left aside....
725 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
729 // withoutUnicodeBuffer,
731 // withoutUnicodePtr);
733 // //fill the buffer with the char
734 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
736 // } //-------------end unicode traitement--------------
738 if (!Character.isDigit(currentCharacter)) {
739 currentPosition = temp;
742 // if (withoutUnicodePtr != 0)
743 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
746 } catch (IndexOutOfBoundsException e) {
747 currentPosition = temp;
752 public final boolean getNextCharAsDigit(int radix) {
754 // handle the case of unicode.
755 // when a unicode appears then we must use a buffer that holds char
757 // At the end of this method currentCharacter holds the new visited char
758 // and currentPosition points right next after it
759 // Both previous lines are true if the currentCharacter is a digit base on
761 // On false, no side effect has occured.
762 // ALL getNextChar.... ARE OPTIMIZED COPIES
763 int temp = currentPosition;
765 currentCharacter = source[currentPosition++];
766 // if (((currentCharacter = source[currentPosition++]) == '\\')
767 // && (source[currentPosition] == 'u')) {
768 // //-------------unicode traitement ------------
769 // int c1, c2, c3, c4;
770 // int unicodeSize = 6;
771 // currentPosition++;
772 // while (source[currentPosition] == 'u') {
773 // currentPosition++;
777 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
779 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
781 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
783 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
785 // currentPosition = temp;
789 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
790 // if (Character.digit(currentCharacter, radix) == -1) {
791 // currentPosition = temp;
795 // //need the unicode buffer
796 // if (withoutUnicodePtr == 0) {
797 // //buffer all the entries that have been left aside....
798 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
802 // withoutUnicodeBuffer,
804 // withoutUnicodePtr);
806 // //fill the buffer with the char
807 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
809 // } //-------------end unicode traitement--------------
811 if (Character.digit(currentCharacter, radix) == -1) {
812 currentPosition = temp;
815 // if (withoutUnicodePtr != 0)
816 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
819 } catch (IndexOutOfBoundsException e) {
820 currentPosition = temp;
825 public boolean getNextCharAsJavaIdentifierPart() {
827 // handle the case of unicode.
828 // when a unicode appears then we must use a buffer that holds char
830 // At the end of this method currentCharacter holds the new visited char
831 // and currentPosition points right next after it
832 // Both previous lines are true if the currentCharacter is a
833 // JavaIdentifierPart
834 // On false, no side effect has occured.
835 // ALL getNextChar.... ARE OPTIMIZED COPIES
836 int temp = currentPosition;
838 currentCharacter = source[currentPosition++];
839 // if (((currentCharacter = source[currentPosition++]) == '\\')
840 // && (source[currentPosition] == 'u')) {
841 // //-------------unicode traitement ------------
842 // int c1, c2, c3, c4;
843 // int unicodeSize = 6;
844 // currentPosition++;
845 // while (source[currentPosition] == 'u') {
846 // currentPosition++;
850 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
852 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
854 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
856 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
858 // currentPosition = temp;
862 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
863 // if (!isPHPIdentifierPart(currentCharacter)) {
864 // currentPosition = temp;
868 // //need the unicode buffer
869 // if (withoutUnicodePtr == 0) {
870 // //buffer all the entries that have been left aside....
871 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
875 // withoutUnicodeBuffer,
877 // withoutUnicodePtr);
879 // //fill the buffer with the char
880 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
882 // } //-------------end unicode traitement--------------
884 if (!isPHPIdentifierPart(currentCharacter)) {
885 currentPosition = temp;
888 // if (withoutUnicodePtr != 0)
889 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
892 } catch (IndexOutOfBoundsException e) {
893 currentPosition = temp;
898 public int getCastOrParen() {
899 int tempPosition = currentPosition;
900 char tempCharacter = currentCharacter;
901 int tempToken = TokenNameLPAREN;
902 boolean found = false;
903 StringBuffer buf = new StringBuffer();
906 currentCharacter = source[currentPosition++];
907 } while (currentCharacter == ' ' || currentCharacter == '\t');
908 while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
909 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
910 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
911 buf.append(currentCharacter);
912 currentCharacter = source[currentPosition++];
914 if (buf.length() >= 3 && buf.length() <= 7) {
915 char[] data = buf.toString().toCharArray();
917 switch (data.length) {
920 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
922 tempToken = TokenNameintCAST;
927 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
929 tempToken = TokenNameboolCAST;
932 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
934 tempToken = TokenNamedoubleCAST;
940 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
941 && (data[++index] == 'y')) {
943 tempToken = TokenNamearrayCAST;
946 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
947 && (data[++index] == 't')) {
949 tempToken = TokenNameunsetCAST;
952 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
953 && (data[++index] == 't')) {
955 tempToken = TokenNamedoubleCAST;
961 // object string double
962 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
963 && (data[++index] == 'c') && (data[++index] == 't')) {
965 tempToken = TokenNameobjectCAST;
968 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
969 && (data[++index] == 'n') && (data[++index] == 'g')) {
971 tempToken = TokenNamestringCAST;
974 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
975 && (data[++index] == 'l') && (data[++index] == 'e')) {
977 tempToken = TokenNamedoubleCAST;
984 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
985 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
987 tempToken = TokenNameboolCAST;
990 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
991 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
993 tempToken = TokenNameintCAST;
999 while (currentCharacter == ' ' || currentCharacter == '\t') {
1000 currentCharacter = source[currentPosition++];
1002 if (currentCharacter == ')') {
1007 } catch (IndexOutOfBoundsException e) {
1009 currentCharacter = tempCharacter;
1010 currentPosition = tempPosition;
1011 return TokenNameLPAREN;
1014 public void consumeStringInterpolated() throws InvalidInputException {
1016 // consume next character
1017 unicodeAsBackSlash = false;
1018 currentCharacter = source[currentPosition++];
1019 // if (((currentCharacter = source[currentPosition++]) == '\\')
1020 // && (source[currentPosition] == 'u')) {
1021 // getNextUnicodeChar();
1023 // if (withoutUnicodePtr != 0) {
1024 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1025 // currentCharacter;
1028 while (currentCharacter != '`') {
1029 /** ** in PHP \r and \n are valid in string literals *** */
1030 // if ((currentCharacter == '\n')
1031 // || (currentCharacter == '\r')) {
1032 // // relocate if finding another quote fairly close: thus unicode
1033 // '/u000D' will be fully consumed
1034 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1035 // if (currentPosition + lookAhead == source.length)
1037 // if (source[currentPosition + lookAhead] == '\n')
1039 // if (source[currentPosition + lookAhead] == '\"') {
1040 // currentPosition += lookAhead + 1;
1044 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1046 if (currentCharacter == '\\') {
1047 int escapeSize = currentPosition;
1048 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1049 // scanEscapeCharacter make a side effect on this value and we need
1050 // the previous value few lines down this one
1051 scanDoubleQuotedEscapeCharacter();
1052 escapeSize = currentPosition - escapeSize;
1053 if (withoutUnicodePtr == 0) {
1054 // buffer all the entries that have been left aside....
1055 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1056 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1057 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1058 } else { // overwrite the / in the buffer
1059 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1060 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1061 // where only one is correct
1062 withoutUnicodePtr--;
1065 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1066 if (recordLineSeparator) {
1067 pushLineSeparator();
1070 // consume next character
1071 unicodeAsBackSlash = false;
1072 currentCharacter = source[currentPosition++];
1073 // if (((currentCharacter = source[currentPosition++]) == '\\')
1074 // && (source[currentPosition] == 'u')) {
1075 // getNextUnicodeChar();
1077 if (withoutUnicodePtr != 0) {
1078 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1082 } catch (IndexOutOfBoundsException e) {
1083 // reset end position for error reporting
1084 currentPosition -= 2;
1085 throw new InvalidInputException(UNTERMINATED_STRING);
1086 } catch (InvalidInputException e) {
1087 if (e.getMessage().equals(INVALID_ESCAPE)) {
1088 // relocate if finding another quote fairly close: thus unicode
1089 // '/u000D' will be fully consumed
1090 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1091 if (currentPosition + lookAhead == source.length)
1093 if (source[currentPosition + lookAhead] == '\n')
1095 if (source[currentPosition + lookAhead] == '`') {
1096 currentPosition += lookAhead + 1;
1103 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1104 // //$NON-NLS-?$ where ? is an
1106 if (currentLine == null) {
1107 currentLine = new NLSLine();
1108 lines.add(currentLine);
1110 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1114 public void consumeStringConstant() throws InvalidInputException {
1116 // consume next character
1117 unicodeAsBackSlash = false;
1118 currentCharacter = source[currentPosition++];
1119 // if (((currentCharacter = source[currentPosition++]) == '\\')
1120 // && (source[currentPosition] == 'u')) {
1121 // getNextUnicodeChar();
1123 // if (withoutUnicodePtr != 0) {
1124 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1125 // currentCharacter;
1128 while (currentCharacter != '\'') {
1129 /** ** in PHP \r and \n are valid in string literals *** */
1130 // if ((currentCharacter == '\n')
1131 // || (currentCharacter == '\r')) {
1132 // // relocate if finding another quote fairly close: thus unicode
1133 // '/u000D' will be fully consumed
1134 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1135 // if (currentPosition + lookAhead == source.length)
1137 // if (source[currentPosition + lookAhead] == '\n')
1139 // if (source[currentPosition + lookAhead] == '\"') {
1140 // currentPosition += lookAhead + 1;
1144 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1146 if (currentCharacter == '\\') {
1147 int escapeSize = currentPosition;
1148 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1149 // scanEscapeCharacter make a side effect on this value and we need
1150 // the previous value few lines down this one
1151 scanSingleQuotedEscapeCharacter();
1152 escapeSize = currentPosition - escapeSize;
1153 if (withoutUnicodePtr == 0) {
1154 // buffer all the entries that have been left aside....
1155 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1156 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1157 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1158 } else { // overwrite the / in the buffer
1159 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1160 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1161 // where only one is correct
1162 withoutUnicodePtr--;
1165 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1166 if (recordLineSeparator) {
1167 pushLineSeparator();
1170 // consume next character
1171 unicodeAsBackSlash = false;
1172 currentCharacter = source[currentPosition++];
1173 // if (((currentCharacter = source[currentPosition++]) == '\\')
1174 // && (source[currentPosition] == 'u')) {
1175 // getNextUnicodeChar();
1177 if (withoutUnicodePtr != 0) {
1178 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1182 } catch (IndexOutOfBoundsException e) {
1183 // reset end position for error reporting
1184 currentPosition -= 2;
1185 throw new InvalidInputException(UNTERMINATED_STRING);
1186 } catch (InvalidInputException e) {
1187 if (e.getMessage().equals(INVALID_ESCAPE)) {
1188 // relocate if finding another quote fairly close: thus unicode
1189 // '/u000D' will be fully consumed
1190 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1191 if (currentPosition + lookAhead == source.length)
1193 if (source[currentPosition + lookAhead] == '\n')
1195 if (source[currentPosition + lookAhead] == '\'') {
1196 currentPosition += lookAhead + 1;
1203 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1204 // //$NON-NLS-?$ where ? is an
1206 if (currentLine == null) {
1207 currentLine = new NLSLine();
1208 lines.add(currentLine);
1210 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1214 public void consumeStringLiteral() throws InvalidInputException {
1216 boolean openDollarBrace = false;
1217 // consume next character
1218 unicodeAsBackSlash = false;
1219 currentCharacter = source[currentPosition++];
1220 while (currentCharacter != '"' || openDollarBrace) {
1221 /** ** in PHP \r and \n are valid in string literals *** */
1222 if (currentCharacter == '\\') {
1223 int escapeSize = currentPosition;
1224 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1225 // scanEscapeCharacter make a side effect on this value and we need
1226 // the previous value few lines down this one
1227 scanDoubleQuotedEscapeCharacter();
1228 escapeSize = currentPosition - escapeSize;
1229 if (withoutUnicodePtr == 0) {
1230 // buffer all the entries that have been left aside....
1231 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1232 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1233 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1234 } else { // overwrite the / in the buffer
1235 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1236 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1237 // where only one is correct
1238 withoutUnicodePtr--;
1241 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1242 openDollarBrace = true;
1243 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1244 openDollarBrace = true;
1245 } else if (currentCharacter == '}') {
1246 openDollarBrace = false;
1247 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1248 if (recordLineSeparator) {
1249 pushLineSeparator();
1252 // consume next character
1253 unicodeAsBackSlash = false;
1254 currentCharacter = source[currentPosition++];
1255 if (withoutUnicodePtr != 0) {
1256 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1259 } catch (IndexOutOfBoundsException e) {
1260 // reset end position for error reporting
1261 currentPosition -= 2;
1262 throw new InvalidInputException(UNTERMINATED_STRING);
1263 } catch (InvalidInputException e) {
1264 if (e.getMessage().equals(INVALID_ESCAPE)) {
1265 // relocate if finding another quote fairly close: thus unicode
1266 // '/u000D' will be fully consumed
1267 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1268 if (currentPosition + lookAhead == source.length)
1270 if (source[currentPosition + lookAhead] == '\n')
1272 if (source[currentPosition + lookAhead] == '\"') {
1273 currentPosition += lookAhead + 1;
1280 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1281 // //$NON-NLS-?$ where ? is an
1283 if (currentLine == null) {
1284 currentLine = new NLSLine();
1285 lines.add(currentLine);
1287 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1291 public int getNextToken() throws InvalidInputException {
1293 return getInlinedHTMLToken(currentPosition);
1295 if (fFillerToken!=TokenNameEOF) {
1297 tempToken = fFillerToken;
1298 fFillerToken=TokenNameEOF;
1301 this.wasAcr = false;
1303 jumpOverMethodBody();
1305 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1309 withoutUnicodePtr = 0;
1310 // start with a new token
1311 char encapsedChar = ' ';
1312 // if (!encapsedStringStack.isEmpty()) {
1313 // encapsedChar = ((Character)
1314 // encapsedStringStack.peek()).charValue();
1316 // if (encapsedChar != '$' && encapsedChar != ' ') {
1317 // currentCharacter = source[currentPosition++];
1318 // if (currentCharacter == encapsedChar) {
1319 // switch (currentCharacter) {
1321 // return TokenNameEncapsedString0;
1323 // return TokenNameEncapsedString1;
1325 // return TokenNameEncapsedString2;
1328 // while (currentCharacter != encapsedChar) {
1329 // /** ** in PHP \r and \n are valid in string literals *** */
1330 // switch (currentCharacter) {
1332 // int escapeSize = currentPosition;
1333 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1334 // //scanEscapeCharacter make a side effect on this value and
1335 // // we need the previous value few lines down this one
1336 // scanDoubleQuotedEscapeCharacter();
1337 // escapeSize = currentPosition - escapeSize;
1338 // if (withoutUnicodePtr == 0) {
1339 // //buffer all the entries that have been left aside....
1340 // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1342 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1343 // withoutUnicodePtr);
1344 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1345 // } else { //overwrite the / in the buffer
1346 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1347 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1348 // withoutUnicodePtr--;
1354 // if (recordLineSeparator) {
1355 // pushLineSeparator();
1359 // if (isPHPIdentifierStart(source[currentPosition]) ||
1360 // source[currentPosition] == '{') {
1361 // currentPosition--;
1362 // encapsedStringStack.push(new Character('$'));
1363 // return TokenNameSTRING;
1367 // if (source[currentPosition] == '$') { // CURLY_OPEN
1368 // currentPosition--;
1369 // encapsedStringStack.push(new Character('$'));
1370 // return TokenNameSTRING;
1373 // // consume next character
1374 // unicodeAsBackSlash = false;
1375 // currentCharacter = source[currentPosition++];
1376 // if (withoutUnicodePtr != 0) {
1377 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1381 // currentPosition--;
1382 // return TokenNameSTRING;
1384 // ---------Consume white space and handles startPosition---------
1385 int whiteStart = currentPosition;
1386 startPosition = currentPosition;
1387 currentCharacter = source[currentPosition++];
1388 // if (encapsedChar == '$') {
1389 // switch (currentCharacter) {
1391 // currentCharacter = source[currentPosition++];
1392 // return TokenNameSTRING;
1394 // if (encapsedChar == '$') {
1395 // if (getNextChar('$'))
1396 // return TokenNameLBRACE_DOLLAR;
1398 // return TokenNameLBRACE;
1400 // return TokenNameRBRACE;
1402 // return TokenNameLBRACKET;
1404 // return TokenNameRBRACKET;
1406 // if (tokenizeStrings) {
1407 // consumeStringConstant();
1408 // return TokenNameStringSingleQuote;
1410 // return TokenNameEncapsedString1;
1412 // return TokenNameEncapsedString2;
1414 // if (tokenizeStrings) {
1415 // consumeStringInterpolated();
1416 // return TokenNameStringInterpolated;
1418 // return TokenNameEncapsedString0;
1420 // if (getNextChar('>'))
1421 // return TokenNameMINUS_GREATER;
1422 // return TokenNameSTRING;
1424 // if (currentCharacter == '$') {
1425 // int oldPosition = currentPosition;
1427 // currentCharacter = source[currentPosition++];
1428 // if (currentCharacter == '{') {
1429 // return TokenNameDOLLAR_LBRACE;
1431 // if (isPHPIdentifierStart(currentCharacter)) {
1432 // return scanIdentifierOrKeyword(true);
1434 // currentPosition = oldPosition;
1435 // return TokenNameSTRING;
1437 // } catch (IndexOutOfBoundsException e) {
1438 // currentPosition = oldPosition;
1439 // return TokenNameSTRING;
1442 // if (isPHPIdentifierStart(currentCharacter))
1443 // return scanIdentifierOrKeyword(false);
1444 // if (Character.isDigit(currentCharacter))
1445 // return scanNumber(false);
1446 // return TokenNameERROR;
1449 // boolean isWhiteSpace;
1451 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1452 startPosition = currentPosition;
1453 currentCharacter = source[currentPosition++];
1454 // if (((currentCharacter = source[currentPosition++]) == '\\')
1455 // && (source[currentPosition] == 'u')) {
1456 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1458 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1459 checkNonExternalizeString();
1460 if (recordLineSeparator) {
1461 pushLineSeparator();
1466 // isWhiteSpace = (currentCharacter == ' ')
1467 // || Character.isWhitespace(currentCharacter);
1470 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1471 // reposition scanner in case we are interested by spaces as tokens
1473 startPosition = whiteStart;
1474 return TokenNameWHITESPACE;
1476 // little trick to get out in the middle of a source compuation
1477 if (currentPosition > eofPosition)
1478 return TokenNameEOF;
1479 // ---------Identify the next token-------------
1480 switch (currentCharacter) {
1482 return getCastOrParen();
1484 return TokenNameRPAREN;
1486 return TokenNameLBRACE;
1488 return TokenNameRBRACE;
1490 return TokenNameLBRACKET;
1492 return TokenNameRBRACKET;
1494 return TokenNameSEMICOLON;
1496 return TokenNameCOMMA;
1498 if (getNextChar('='))
1499 return TokenNameDOT_EQUAL;
1500 if (getNextCharAsDigit())
1501 return scanNumber(true);
1502 return TokenNameDOT;
1505 if ((test = getNextChar('+', '=')) == 0)
1506 return TokenNamePLUS_PLUS;
1508 return TokenNamePLUS_EQUAL;
1509 return TokenNamePLUS;
1513 if ((test = getNextChar('-', '=')) == 0)
1514 return TokenNameMINUS_MINUS;
1516 return TokenNameMINUS_EQUAL;
1517 if (getNextChar('>'))
1518 return TokenNameMINUS_GREATER;
1519 return TokenNameMINUS;
1522 if (getNextChar('='))
1523 return TokenNameTWIDDLE_EQUAL;
1524 return TokenNameTWIDDLE;
1526 if (getNextChar('=')) {
1527 if (getNextChar('=')) {
1528 return TokenNameNOT_EQUAL_EQUAL;
1530 return TokenNameNOT_EQUAL;
1532 return TokenNameNOT;
1534 if (getNextChar('='))
1535 return TokenNameMULTIPLY_EQUAL;
1536 return TokenNameMULTIPLY;
1538 if (getNextChar('='))
1539 return TokenNameREMAINDER_EQUAL;
1540 return TokenNameREMAINDER;
1542 int oldPosition = currentPosition;
1544 currentCharacter = source[currentPosition++];
1545 } catch (IndexOutOfBoundsException e) {
1546 currentPosition = oldPosition;
1547 return TokenNameLESS;
1549 switch (currentCharacter) {
1551 return TokenNameLESS_EQUAL;
1553 return TokenNameNOT_EQUAL;
1555 if (getNextChar('='))
1556 return TokenNameLEFT_SHIFT_EQUAL;
1557 if (getNextChar('<')) {
1558 currentCharacter = source[currentPosition++];
1559 while (Character.isWhitespace(currentCharacter)) {
1560 currentCharacter = source[currentPosition++];
1562 int heredocStart = currentPosition - 1;
1563 int heredocLength = 0;
1564 if (isPHPIdentifierStart(currentCharacter)) {
1565 currentCharacter = source[currentPosition++];
1567 return TokenNameERROR;
1569 while (isPHPIdentifierPart(currentCharacter)) {
1570 currentCharacter = source[currentPosition++];
1572 heredocLength = currentPosition - heredocStart - 1;
1573 // heredoc end-tag determination
1574 boolean endTag = true;
1577 ch = source[currentPosition++];
1578 if (ch == '\r' || ch == '\n') {
1579 if (recordLineSeparator) {
1580 pushLineSeparator();
1584 for (int i = 0; i < heredocLength; i++) {
1585 if (source[currentPosition + i] != source[heredocStart + i]) {
1591 currentPosition += heredocLength - 1;
1592 currentCharacter = source[currentPosition++];
1593 break; // do...while loop
1599 return TokenNameHEREDOC;
1601 return TokenNameLEFT_SHIFT;
1603 currentPosition = oldPosition;
1604 return TokenNameLESS;
1608 if ((test = getNextChar('=', '>')) == 0)
1609 return TokenNameGREATER_EQUAL;
1611 if ((test = getNextChar('=', '>')) == 0)
1612 return TokenNameRIGHT_SHIFT_EQUAL;
1613 return TokenNameRIGHT_SHIFT;
1615 return TokenNameGREATER;
1618 if (getNextChar('=')) {
1619 if (getNextChar('=')) {
1620 return TokenNameEQUAL_EQUAL_EQUAL;
1622 return TokenNameEQUAL_EQUAL;
1624 if (getNextChar('>'))
1625 return TokenNameEQUAL_GREATER;
1626 return TokenNameEQUAL;
1629 if ((test = getNextChar('&', '=')) == 0)
1630 return TokenNameAND_AND;
1632 return TokenNameAND_EQUAL;
1633 return TokenNameAND;
1637 if ((test = getNextChar('|', '=')) == 0)
1638 return TokenNameOR_OR;
1640 return TokenNameOR_EQUAL;
1644 if (getNextChar('='))
1645 return TokenNameXOR_EQUAL;
1646 return TokenNameXOR;
1648 if (getNextChar('>')) {
1650 if (currentPosition == source.length) {
1652 return TokenNameINLINE_HTML;
1654 return getInlinedHTMLToken(currentPosition - 2);
1656 return TokenNameQUESTION;
1658 if (getNextChar(':'))
1659 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1660 return TokenNameCOLON;
1664 consumeStringConstant();
1665 return TokenNameStringSingleQuote;
1667 // if (tokenizeStrings) {
1668 consumeStringLiteral();
1669 return TokenNameStringDoubleQuote;
1671 // return TokenNameEncapsedString2;
1673 // if (tokenizeStrings) {
1674 consumeStringInterpolated();
1675 return TokenNameStringInterpolated;
1677 // return TokenNameEncapsedString0;
1680 char startChar = currentCharacter;
1681 if (getNextChar('=') && startChar == '/') {
1682 return TokenNameDIVIDE_EQUAL;
1685 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1687 this.lastCommentLinePosition = this.currentPosition;
1688 int endPositionForLineComment = 0;
1689 try { // get the next char
1690 currentCharacter = source[currentPosition++];
1691 // if (((currentCharacter = source[currentPosition++])
1693 // && (source[currentPosition] == 'u')) {
1694 // //-------------unicode traitement ------------
1695 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1696 // currentPosition++;
1697 // while (source[currentPosition] == 'u') {
1698 // currentPosition++;
1701 // Character.getNumericValue(source[currentPosition++]))
1705 // Character.getNumericValue(source[currentPosition++]))
1709 // Character.getNumericValue(source[currentPosition++]))
1713 // Character.getNumericValue(source[currentPosition++]))
1717 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1719 // currentCharacter =
1720 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1723 // handle the \\u case manually into comment
1724 // if (currentCharacter == '\\') {
1725 // if (source[currentPosition] == '\\')
1726 // currentPosition++;
1727 // } //jump over the \\
1728 boolean isUnicode = false;
1729 while (currentCharacter != '\r' && currentCharacter != '\n') {
1730 this.lastCommentLinePosition = this.currentPosition;
1731 if (currentCharacter == '?') {
1732 if (getNextChar('>')) {
1733 // ?> breaks line comments
1734 startPosition = currentPosition - 2;
1736 return TokenNameINLINE_HTML;
1739 // get the next char
1741 currentCharacter = source[currentPosition++];
1742 // if (((currentCharacter = source[currentPosition++])
1744 // && (source[currentPosition] == 'u')) {
1745 // isUnicode = true;
1746 // //-------------unicode traitement ------------
1747 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1748 // currentPosition++;
1749 // while (source[currentPosition] == 'u') {
1750 // currentPosition++;
1753 // Character.getNumericValue(source[currentPosition++]))
1757 // Character.getNumericValue(
1758 // source[currentPosition++]))
1762 // Character.getNumericValue(
1763 // source[currentPosition++]))
1767 // Character.getNumericValue(
1768 // source[currentPosition++]))
1772 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1774 // currentCharacter =
1775 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1778 // handle the \\u case manually into comment
1779 // if (currentCharacter == '\\') {
1780 // if (source[currentPosition] == '\\')
1781 // currentPosition++;
1782 // } //jump over the \\
1785 endPositionForLineComment = currentPosition - 6;
1787 endPositionForLineComment = currentPosition - 1;
1789 // recordComment(false);
1790 recordComment(TokenNameCOMMENT_LINE);
1791 if (this.taskTags != null)
1792 checkTaskTag(this.startPosition, this.currentPosition);
1793 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1794 checkNonExternalizeString();
1795 if (recordLineSeparator) {
1797 pushUnicodeLineSeparator();
1799 pushLineSeparator();
1805 if (tokenizeComments) {
1807 currentPosition = endPositionForLineComment;
1808 // reset one character behind
1810 return TokenNameCOMMENT_LINE;
1812 } catch (IndexOutOfBoundsException e) { // an eof will them
1814 if (tokenizeComments) {
1816 // reset one character behind
1817 return TokenNameCOMMENT_LINE;
1823 // traditional and annotation comment
1824 boolean isJavadoc = false, star = false;
1825 // consume next character
1826 unicodeAsBackSlash = false;
1827 currentCharacter = source[currentPosition++];
1828 // if (((currentCharacter = source[currentPosition++]) ==
1830 // && (source[currentPosition] == 'u')) {
1831 // getNextUnicodeChar();
1833 // if (withoutUnicodePtr != 0) {
1834 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1835 // currentCharacter;
1838 if (currentCharacter == '*') {
1842 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1843 checkNonExternalizeString();
1844 if (recordLineSeparator) {
1845 pushLineSeparator();
1850 try { // get the next char
1851 currentCharacter = source[currentPosition++];
1852 // if (((currentCharacter = source[currentPosition++])
1854 // && (source[currentPosition] == 'u')) {
1855 // //-------------unicode traitement ------------
1856 // getNextUnicodeChar();
1858 // handle the \\u case manually into comment
1859 // if (currentCharacter == '\\') {
1860 // if (source[currentPosition] == '\\')
1861 // currentPosition++;
1862 // //jump over the \\
1864 // empty comment is not a javadoc /**/
1865 if (currentCharacter == '/') {
1868 // loop until end of comment */
1869 while ((currentCharacter != '/') || (!star)) {
1870 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1871 checkNonExternalizeString();
1872 if (recordLineSeparator) {
1873 pushLineSeparator();
1878 star = currentCharacter == '*';
1880 currentCharacter = source[currentPosition++];
1881 // if (((currentCharacter = source[currentPosition++])
1883 // && (source[currentPosition] == 'u')) {
1884 // //-------------unicode traitement ------------
1885 // getNextUnicodeChar();
1887 // handle the \\u case manually into comment
1888 // if (currentCharacter == '\\') {
1889 // if (source[currentPosition] == '\\')
1890 // currentPosition++;
1891 // } //jump over the \\
1893 // recordComment(isJavadoc);
1895 recordComment(TokenNameCOMMENT_PHPDOC);
1897 recordComment(TokenNameCOMMENT_BLOCK);
1900 if (tokenizeComments) {
1902 return TokenNameCOMMENT_PHPDOC;
1903 return TokenNameCOMMENT_BLOCK;
1906 if (this.taskTags != null) {
1907 checkTaskTag(this.startPosition, this.currentPosition);
1909 } catch (IndexOutOfBoundsException e) {
1910 // reset end position for error reporting
1911 currentPosition -= 2;
1912 throw new InvalidInputException(UNTERMINATED_COMMENT);
1916 return TokenNameDIVIDE;
1920 return TokenNameEOF;
1921 // the atEnd may not be <currentPosition == source.length> if
1922 // source is only some part of a real (external) stream
1923 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1925 if (currentCharacter == '$') {
1926 int oldPosition = currentPosition;
1928 currentCharacter = source[currentPosition++];
1929 if (isPHPIdentifierStart(currentCharacter)) {
1930 return scanIdentifierOrKeyword(true);
1932 currentPosition = oldPosition;
1933 return TokenNameDOLLAR;
1935 } catch (IndexOutOfBoundsException e) {
1936 currentPosition = oldPosition;
1937 return TokenNameDOLLAR;
1940 if (isPHPIdentifierStart(currentCharacter))
1941 return scanIdentifierOrKeyword(false);
1942 if (Character.isDigit(currentCharacter))
1943 return scanNumber(false);
1944 return TokenNameERROR;
1947 } // -----------------end switch while try--------------------
1948 catch (IndexOutOfBoundsException e) {
1951 return TokenNameEOF;
1956 * @throws InvalidInputException
1958 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1959 boolean phpShortTag = false; // true, if <?= detected
1960 if (currentPosition > source.length) {
1961 currentPosition = source.length;
1962 return TokenNameEOF;
1964 startPosition = start;
1967 currentCharacter = source[currentPosition++];
1968 if (currentCharacter == '<') {
1969 if (getNextChar('?')) {
1970 currentCharacter = source[currentPosition++];
1971 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1972 if (currentCharacter != '=') { // <?=
1974 phpShortTag = false;
1979 if (ignorePHPOneLiner) { // for CodeFormatter
1980 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1983 fFillerToken = TokenNameecho;
1985 return TokenNameINLINE_HTML;
1990 fFillerToken = TokenNameecho;
1992 return TokenNameINLINE_HTML;
1995 int test = getNextChar('H', 'h');
1997 test = getNextChar('P', 'p');
2000 if (ignorePHPOneLiner) {
2001 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
2003 return TokenNameINLINE_HTML;
2007 return TokenNameINLINE_HTML;
2015 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2016 if (recordLineSeparator) {
2017 pushLineSeparator();
2022 } // -----------------while--------------------
2024 return TokenNameINLINE_HTML;
2025 } // -----------------try--------------------
2026 catch (IndexOutOfBoundsException e) {
2027 startPosition = start;
2031 return TokenNameINLINE_HTML;
2035 * check if the PHP is only in this line (for CodeFormatter)
2039 private int lookAheadLinePHPTag() {
2040 int currentPositionInLine = currentPosition;
2041 char previousCharInLine = ' ';
2042 char currentCharInLine = ' ';
2043 boolean singleQuotedStringActive = false;
2044 boolean doubleQuotedStringActive = false;
2047 // look ahead in this line
2049 previousCharInLine = currentCharInLine;
2050 currentCharInLine = source[currentPositionInLine++];
2051 switch (currentCharInLine) {
2053 if (previousCharInLine == '?') {
2054 // update the scanner's current Position in the source
2055 currentPosition = currentPositionInLine;
2056 // use as "dummy" token
2057 return TokenNameEOF;
2061 if (doubleQuotedStringActive) {
2062 // ignore escaped characters in double quoted strings
2063 previousCharInLine = currentCharInLine;
2064 currentCharInLine = source[currentPositionInLine++];
2067 if (doubleQuotedStringActive) {
2068 doubleQuotedStringActive = false;
2070 if (!singleQuotedStringActive) {
2071 doubleQuotedStringActive = true;
2076 if (singleQuotedStringActive) {
2077 if (previousCharInLine != '\\') {
2078 singleQuotedStringActive = false;
2081 if (!doubleQuotedStringActive) {
2082 singleQuotedStringActive = true;
2088 return TokenNameINLINE_HTML;
2090 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2092 return TokenNameINLINE_HTML;
2096 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2098 return TokenNameINLINE_HTML;
2102 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2104 return TokenNameINLINE_HTML;
2109 } catch (IndexOutOfBoundsException e) {
2111 currentPosition = currentPositionInLine;
2112 return TokenNameINLINE_HTML;
2116 // public final void getNextUnicodeChar()
2117 // throws IndexOutOfBoundsException, InvalidInputException {
2119 // //handle the case of unicode.
2120 // //when a unicode appears then we must use a buffer that holds char
2122 // //At the end of this method currentCharacter holds the new visited char
2123 // //and currentPosition points right next after it
2125 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2127 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2128 // currentPosition++;
2129 // while (source[currentPosition] == 'u') {
2130 // currentPosition++;
2134 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2136 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2138 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2140 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2142 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2144 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2145 // //need the unicode buffer
2146 // if (withoutUnicodePtr == 0) {
2147 // //buffer all the entries that have been left aside....
2148 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2149 // System.arraycopy(
2152 // withoutUnicodeBuffer,
2154 // withoutUnicodePtr);
2156 // //fill the buffer with the char
2157 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2159 // unicodeAsBackSlash = currentCharacter == '\\';
2162 * Tokenize a method body, assuming that curly brackets are properly balanced.
2164 public final void jumpOverMethodBody() {
2165 this.wasAcr = false;
2168 while (true) { // loop for jumping over comments
2169 // ---------Consume white space and handles startPosition---------
2170 boolean isWhiteSpace;
2172 startPosition = currentPosition;
2173 currentCharacter = source[currentPosition++];
2174 // if (((currentCharacter = source[currentPosition++]) == '\\')
2175 // && (source[currentPosition] == 'u')) {
2176 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2178 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2179 pushLineSeparator();
2180 isWhiteSpace = Character.isWhitespace(currentCharacter);
2182 } while (isWhiteSpace);
2183 // -------consume token until } is found---------
2184 switch (currentCharacter) {
2195 test = getNextChar('\\');
2198 scanDoubleQuotedEscapeCharacter();
2199 } catch (InvalidInputException ex) {
2203 // try { // consume next character
2204 unicodeAsBackSlash = false;
2205 currentCharacter = source[currentPosition++];
2206 // if (((currentCharacter = source[currentPosition++]) == '\\')
2207 // && (source[currentPosition] == 'u')) {
2208 // getNextUnicodeChar();
2210 if (withoutUnicodePtr != 0) {
2211 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2214 // } catch (InvalidInputException ex) {
2222 // try { // consume next character
2223 unicodeAsBackSlash = false;
2224 currentCharacter = source[currentPosition++];
2225 // if (((currentCharacter = source[currentPosition++]) == '\\')
2226 // && (source[currentPosition] == 'u')) {
2227 // getNextUnicodeChar();
2229 if (withoutUnicodePtr != 0) {
2230 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2233 // } catch (InvalidInputException ex) {
2235 while (currentCharacter != '"') {
2236 if (currentCharacter == '\r') {
2237 if (source[currentPosition] == '\n')
2240 // the string cannot go further that the line
2242 if (currentCharacter == '\n') {
2244 // the string cannot go further that the line
2246 if (currentCharacter == '\\') {
2248 scanDoubleQuotedEscapeCharacter();
2249 } catch (InvalidInputException ex) {
2253 // try { // consume next character
2254 unicodeAsBackSlash = false;
2255 currentCharacter = source[currentPosition++];
2256 // if (((currentCharacter = source[currentPosition++]) == '\\')
2257 // && (source[currentPosition] == 'u')) {
2258 // getNextUnicodeChar();
2260 if (withoutUnicodePtr != 0) {
2261 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2264 // } catch (InvalidInputException ex) {
2267 } catch (IndexOutOfBoundsException e) {
2273 if ((test = getNextChar('/', '*')) == 0) {
2276 // get the next char
2277 currentCharacter = source[currentPosition++];
2278 // if (((currentCharacter = source[currentPosition++]) ==
2280 // && (source[currentPosition] == 'u')) {
2281 // //-------------unicode traitement ------------
2282 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2283 // currentPosition++;
2284 // while (source[currentPosition] == 'u') {
2285 // currentPosition++;
2288 // Character.getNumericValue(source[currentPosition++]))
2292 // Character.getNumericValue(source[currentPosition++]))
2296 // Character.getNumericValue(source[currentPosition++]))
2300 // Character.getNumericValue(source[currentPosition++]))
2303 // //error don't care of the value
2304 // currentCharacter = 'A';
2305 // } //something different from \n and \r
2307 // currentCharacter =
2308 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2311 while (currentCharacter != '\r' && currentCharacter != '\n') {
2312 // get the next char
2313 currentCharacter = source[currentPosition++];
2314 // if (((currentCharacter = source[currentPosition++])
2316 // && (source[currentPosition] == 'u')) {
2317 // //-------------unicode traitement ------------
2318 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2319 // currentPosition++;
2320 // while (source[currentPosition] == 'u') {
2321 // currentPosition++;
2324 // Character.getNumericValue(source[currentPosition++]))
2328 // Character.getNumericValue(source[currentPosition++]))
2332 // Character.getNumericValue(source[currentPosition++]))
2336 // Character.getNumericValue(source[currentPosition++]))
2339 // //error don't care of the value
2340 // currentCharacter = 'A';
2341 // } //something different from \n and \r
2343 // currentCharacter =
2344 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2348 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2349 pushLineSeparator();
2350 } catch (IndexOutOfBoundsException e) {
2351 } // an eof will them be generated
2355 // traditional and annotation comment
2356 boolean star = false;
2357 // try { // consume next character
2358 unicodeAsBackSlash = false;
2359 currentCharacter = source[currentPosition++];
2360 // if (((currentCharacter = source[currentPosition++]) == '\\')
2361 // && (source[currentPosition] == 'u')) {
2362 // getNextUnicodeChar();
2364 if (withoutUnicodePtr != 0) {
2365 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2368 // } catch (InvalidInputException ex) {
2370 if (currentCharacter == '*') {
2373 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2374 pushLineSeparator();
2375 try { // get the next char
2376 currentCharacter = source[currentPosition++];
2377 // if (((currentCharacter = source[currentPosition++]) ==
2379 // && (source[currentPosition] == 'u')) {
2380 // //-------------unicode traitement ------------
2381 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2382 // currentPosition++;
2383 // while (source[currentPosition] == 'u') {
2384 // currentPosition++;
2387 // Character.getNumericValue(source[currentPosition++]))
2391 // Character.getNumericValue(source[currentPosition++]))
2395 // Character.getNumericValue(source[currentPosition++]))
2399 // Character.getNumericValue(source[currentPosition++]))
2402 // //error don't care of the value
2403 // currentCharacter = 'A';
2404 // } //something different from * and /
2406 // currentCharacter =
2407 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2410 // loop until end of comment */
2411 while ((currentCharacter != '/') || (!star)) {
2412 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2413 pushLineSeparator();
2414 star = currentCharacter == '*';
2416 currentCharacter = source[currentPosition++];
2417 // if (((currentCharacter = source[currentPosition++])
2419 // && (source[currentPosition] == 'u')) {
2420 // //-------------unicode traitement ------------
2421 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2422 // currentPosition++;
2423 // while (source[currentPosition] == 'u') {
2424 // currentPosition++;
2427 // Character.getNumericValue(source[currentPosition++]))
2431 // Character.getNumericValue(source[currentPosition++]))
2435 // Character.getNumericValue(source[currentPosition++]))
2439 // Character.getNumericValue(source[currentPosition++]))
2442 // //error don't care of the value
2443 // currentCharacter = 'A';
2444 // } //something different from * and /
2446 // currentCharacter =
2447 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2451 } catch (IndexOutOfBoundsException e) {
2459 if (isPHPIdentOrVarStart(currentCharacter)) {
2461 scanIdentifierOrKeyword((currentCharacter == '$'));
2462 } catch (InvalidInputException ex) {
2467 if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2468 // if (Character.isDigit(currentCharacter)) {
2471 } catch (InvalidInputException ex) {
2478 // -----------------end switch while try--------------------
2479 } catch (IndexOutOfBoundsException e) {
2480 } catch (InvalidInputException e) {
2485 // public final boolean jumpOverUnicodeWhiteSpace()
2486 // throws InvalidInputException {
2488 // //handle the case of unicode. Jump over the next whiteSpace
2489 // //making startPosition pointing on the next available char
2490 // //On false, the currentCharacter is filled up with a potential
2494 // this.wasAcr = false;
2495 // int c1, c2, c3, c4;
2496 // int unicodeSize = 6;
2497 // currentPosition++;
2498 // while (source[currentPosition] == 'u') {
2499 // currentPosition++;
2503 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2505 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2507 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2509 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2511 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2514 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2515 // if (recordLineSeparator
2516 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2517 // pushLineSeparator();
2518 // if (Character.isWhitespace(currentCharacter))
2521 // //buffer the new char which is not a white space
2522 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2523 // //withoutUnicodePtr == 1 is true here
2525 // } catch (IndexOutOfBoundsException e) {
2526 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2529 public final int[] getLineEnds() {
2530 // return a bounded copy of this.lineEnds
2532 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2536 public char[] getSource() {
2540 public static boolean isIdentifierOrKeyword(int token) {
2541 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2544 final char[] optimizedCurrentTokenSource1() {
2545 // return always the same char[] build only once
2546 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2547 char charOne = source[startPosition];
2602 return new char[] { charOne };
2606 final char[] optimizedCurrentTokenSource2() {
2608 c0 = source[startPosition];
2609 c1 = source[startPosition + 1];
2611 // return always the same char[] build only once
2612 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2615 return charArray_va;
2617 return charArray_vb;
2619 return charArray_vc;
2621 return charArray_vd;
2623 return charArray_ve;
2625 return charArray_vf;
2627 return charArray_vg;
2629 return charArray_vh;
2631 return charArray_vi;
2633 return charArray_vj;
2635 return charArray_vk;
2637 return charArray_vl;
2639 return charArray_vm;
2641 return charArray_vn;
2643 return charArray_vo;
2645 return charArray_vp;
2647 return charArray_vq;
2649 return charArray_vr;
2651 return charArray_vs;
2653 return charArray_vt;
2655 return charArray_vu;
2657 return charArray_vv;
2659 return charArray_vw;
2661 return charArray_vx;
2663 return charArray_vy;
2665 return charArray_vz;
2668 // try to return the same char[] build only once
2669 int hash = ((c0 << 6) + c1) % TableSize;
2670 char[][] table = charArray_length[0][hash];
2672 while (++i < InternalTableSize) {
2673 char[] charArray = table[i];
2674 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2677 // ---------other side---------
2679 int max = newEntry2;
2680 while (++i <= max) {
2681 char[] charArray = table[i];
2682 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2685 // --------add the entry-------
2686 if (++max >= InternalTableSize)
2689 table[max] = (r = new char[] { c0, c1 });
2694 final char[] optimizedCurrentTokenSource3() {
2695 // try to return the same char[] build only once
2697 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2699 char[][] table = charArray_length[1][hash];
2701 while (++i < InternalTableSize) {
2702 char[] charArray = table[i];
2703 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2706 // ---------other side---------
2708 int max = newEntry3;
2709 while (++i <= max) {
2710 char[] charArray = table[i];
2711 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2714 // --------add the entry-------
2715 if (++max >= InternalTableSize)
2718 table[max] = (r = new char[] { c0, c1, c2 });
2723 final char[] optimizedCurrentTokenSource4() {
2724 // try to return the same char[] build only once
2725 char c0, c1, c2, c3;
2726 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2727 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2729 char[][] table = charArray_length[2][(int) hash];
2731 while (++i < InternalTableSize) {
2732 char[] charArray = table[i];
2733 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2736 // ---------other side---------
2738 int max = newEntry4;
2739 while (++i <= max) {
2740 char[] charArray = table[i];
2741 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2744 // --------add the entry-------
2745 if (++max >= InternalTableSize)
2748 table[max] = (r = new char[] { c0, c1, c2, c3 });
2753 final char[] optimizedCurrentTokenSource5() {
2754 // try to return the same char[] build only once
2755 char c0, c1, c2, c3, c4;
2756 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2757 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2759 char[][] table = charArray_length[3][(int) hash];
2761 while (++i < InternalTableSize) {
2762 char[] charArray = table[i];
2763 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2766 // ---------other side---------
2768 int max = newEntry5;
2769 while (++i <= max) {
2770 char[] charArray = table[i];
2771 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2774 // --------add the entry-------
2775 if (++max >= InternalTableSize)
2778 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2783 final char[] optimizedCurrentTokenSource6() {
2784 // try to return the same char[] build only once
2785 char c0, c1, c2, c3, c4, c5;
2786 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2787 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2788 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2790 char[][] table = charArray_length[4][(int) hash];
2792 while (++i < InternalTableSize) {
2793 char[] charArray = table[i];
2794 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2795 && (c5 == charArray[5]))
2798 // ---------other side---------
2800 int max = newEntry6;
2801 while (++i <= max) {
2802 char[] charArray = table[i];
2803 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2804 && (c5 == charArray[5]))
2807 // --------add the entry-------
2808 if (++max >= InternalTableSize)
2811 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2816 public final void pushLineSeparator() throws InvalidInputException {
2817 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2818 final int INCREMENT = 250;
2819 if (this.checkNonExternalizedStringLiterals) {
2820 // reinitialize the current line for non externalize strings purpose
2823 // currentCharacter is at position currentPosition-1
2825 if (currentCharacter == '\r') {
2826 int separatorPos = currentPosition - 1;
2827 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2829 // System.out.println("CR-" + separatorPos);
2831 lineEnds[++linePtr] = separatorPos;
2832 } catch (IndexOutOfBoundsException e) {
2833 // linePtr value is correct
2834 int oldLength = lineEnds.length;
2835 int[] old = lineEnds;
2836 lineEnds = new int[oldLength + INCREMENT];
2837 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2838 lineEnds[linePtr] = separatorPos;
2840 // look-ahead for merged cr+lf
2842 if (source[currentPosition] == '\n') {
2843 // System.out.println("look-ahead LF-" + currentPosition);
2844 lineEnds[linePtr] = currentPosition;
2850 } catch (IndexOutOfBoundsException e) {
2855 if (currentCharacter == '\n') {
2856 // must merge eventual cr followed by lf
2857 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2858 // System.out.println("merge LF-" + (currentPosition - 1));
2859 lineEnds[linePtr] = currentPosition - 1;
2861 int separatorPos = currentPosition - 1;
2862 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2864 // System.out.println("LF-" + separatorPos);
2866 lineEnds[++linePtr] = separatorPos;
2867 } catch (IndexOutOfBoundsException e) {
2868 // linePtr value is correct
2869 int oldLength = lineEnds.length;
2870 int[] old = lineEnds;
2871 lineEnds = new int[oldLength + INCREMENT];
2872 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2873 lineEnds[linePtr] = separatorPos;
2881 public final void pushUnicodeLineSeparator() {
2882 // isUnicode means that the \r or \n has been read as a unicode character
2883 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2884 final int INCREMENT = 250;
2885 // currentCharacter is at position currentPosition-1
2886 if (this.checkNonExternalizedStringLiterals) {
2887 // reinitialize the current line for non externalize strings purpose
2891 if (currentCharacter == '\r') {
2892 int separatorPos = currentPosition - 6;
2893 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2895 // System.out.println("CR-" + separatorPos);
2897 lineEnds[++linePtr] = separatorPos;
2898 } catch (IndexOutOfBoundsException e) {
2899 // linePtr value is correct
2900 int oldLength = lineEnds.length;
2901 int[] old = lineEnds;
2902 lineEnds = new int[oldLength + INCREMENT];
2903 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2904 lineEnds[linePtr] = separatorPos;
2906 // look-ahead for merged cr+lf
2907 if (source[currentPosition] == '\n') {
2908 // System.out.println("look-ahead LF-" + currentPosition);
2909 lineEnds[linePtr] = currentPosition;
2917 if (currentCharacter == '\n') {
2918 // must merge eventual cr followed by lf
2919 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2920 // System.out.println("merge LF-" + (currentPosition - 1));
2921 lineEnds[linePtr] = currentPosition - 6;
2923 int separatorPos = currentPosition - 6;
2924 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2926 // System.out.println("LF-" + separatorPos);
2928 lineEnds[++linePtr] = separatorPos;
2929 } catch (IndexOutOfBoundsException e) {
2930 // linePtr value is correct
2931 int oldLength = lineEnds.length;
2932 int[] old = lineEnds;
2933 lineEnds = new int[oldLength + INCREMENT];
2934 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2935 lineEnds[linePtr] = separatorPos;
2943 public void recordComment(int token) {
2945 int stopPosition = this.currentPosition;
2947 case TokenNameCOMMENT_LINE:
2948 stopPosition = -this.lastCommentLinePosition;
2950 case TokenNameCOMMENT_BLOCK:
2951 stopPosition = -this.currentPosition;
2955 // a new comment is recorded
2956 int length = this.commentStops.length;
2957 if (++this.commentPtr >= length) {
2958 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2959 // grows the positions buffers too
2960 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2962 this.commentStops[this.commentPtr] = stopPosition;
2963 this.commentStarts[this.commentPtr] = this.startPosition;
2966 // public final void recordComment(boolean isJavadoc) {
2967 // // a new annotation comment is recorded
2969 // commentStops[++commentPtr] = isJavadoc
2970 // ? currentPosition
2971 // : -currentPosition;
2972 // } catch (IndexOutOfBoundsException e) {
2973 // int oldStackLength = commentStops.length;
2974 // int[] oldStack = commentStops;
2975 // commentStops = new int[oldStackLength + 30];
2976 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2977 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2978 // //grows the positions buffers too
2979 // int[] old = commentStarts;
2980 // commentStarts = new int[oldStackLength + 30];
2981 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2983 // //the buffer is of a correct size here
2984 // commentStarts[commentPtr] = startPosition;
2986 public void resetTo(int begin, int end) {
2987 // reset the scanner to a given position where it may rescan again
2989 initialPosition = startPosition = currentPosition = begin;
2990 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2991 commentPtr = -1; // reset comment stack
2994 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2995 // the string with "\\u" is a legal string of two chars \ and u
2996 // thus we use a direct access to the source (for regular cases).
2997 // if (unicodeAsBackSlash) {
2998 // // consume next character
2999 // unicodeAsBackSlash = false;
3000 // if (((currentCharacter = source[currentPosition++]) == '\\')
3001 // && (source[currentPosition] == 'u')) {
3002 // getNextUnicodeChar();
3004 // if (withoutUnicodePtr != 0) {
3005 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3009 currentCharacter = source[currentPosition++];
3010 switch (currentCharacter) {
3012 currentCharacter = '\'';
3015 currentCharacter = '\\';
3018 currentCharacter = '\\';
3023 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3024 currentCharacter = source[currentPosition++];
3025 switch (currentCharacter) {
3027 // currentCharacter = '\b';
3030 currentCharacter = '\t';
3033 currentCharacter = '\n';
3036 // currentCharacter = '\f';
3039 currentCharacter = '\r';
3042 currentCharacter = '\"';
3045 currentCharacter = '\'';
3048 currentCharacter = '\\';
3051 currentCharacter = '$';
3054 // -----------octal escape--------------
3056 // OctalDigit OctalDigit
3057 // ZeroToThree OctalDigit OctalDigit
3058 int number = Character.getNumericValue(currentCharacter);
3059 if (number >= 0 && number <= 7) {
3060 boolean zeroToThreeNot = number > 3;
3061 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3062 int digit = Character.getNumericValue(currentCharacter);
3063 if (digit >= 0 && digit <= 7) {
3064 number = (number * 8) + digit;
3065 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3066 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3067 // Digit --> ignore last character
3070 digit = Character.getNumericValue(currentCharacter);
3071 if (digit >= 0 && digit <= 7) {
3072 // has read \ZeroToThree OctalDigit OctalDigit
3073 number = (number * 8) + digit;
3074 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3075 // --> ignore last character
3079 } else { // has read \OctalDigit NonDigit--> ignore last
3083 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3087 } else { // has read \OctalDigit --> ignore last character
3091 throw new InvalidInputException(INVALID_ESCAPE);
3092 currentCharacter = (char) number;
3095 // throw new InvalidInputException(INVALID_ESCAPE);
3099 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3100 // return scanIdentifierOrKeyword( false );
3102 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3104 // first dispatch on the first char.
3105 // then the length. If there are several
3106 // keywors with the same length AND the same first char, then do another
3107 // disptach on the second char :-)...cool....but fast !
3108 useAssertAsAnIndentifier = false;
3109 while (getNextCharAsJavaIdentifierPart()) {
3113 // if (new String(getCurrentTokenSource()).equals("$this")) {
3114 // return TokenNamethis;
3116 return TokenNameVariable;
3121 // if (withoutUnicodePtr == 0)
3122 // quick test on length == 1 but not on length > 12 while most identifier
3123 // have a length which is <= 12...but there are lots of identifier with
3124 // only one char....
3126 if ((length = currentPosition - startPosition) == 1)
3127 return TokenNameIdentifier;
3129 data = new char[length];
3130 index = startPosition;
3131 for (int i = 0; i < length; i++) {
3132 data[i] = Character.toLowerCase(source[index + i]);
3136 // if ((length = withoutUnicodePtr) == 1)
3137 // return TokenNameIdentifier;
3138 // // data = withoutUnicodeBuffer;
3139 // data = new char[withoutUnicodeBuffer.length];
3140 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3141 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3145 firstLetter = data[index];
3146 switch (firstLetter) {
3151 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3152 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3153 return TokenNameFILE;
3154 index = 0; // __LINE__
3155 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3156 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3157 return TokenNameLINE;
3161 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3162 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3163 return TokenNameCLASS_C;
3167 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3168 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3169 && (data[++index] == '_'))
3170 return TokenNameMETHOD_C;
3174 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3175 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3176 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3177 return TokenNameFUNC_C;
3180 return TokenNameIdentifier;
3182 // as and array abstract
3186 if ((data[++index] == 's')) {
3189 return TokenNameIdentifier;
3193 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3194 return TokenNameand;
3196 return TokenNameIdentifier;
3200 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3201 return TokenNamearray;
3203 return TokenNameIdentifier;
3205 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3206 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3207 return TokenNameabstract;
3209 return TokenNameIdentifier;
3211 return TokenNameIdentifier;
3217 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3218 return TokenNamebreak;
3220 return TokenNameIdentifier;
3222 return TokenNameIdentifier;
3225 // case catch class clone const continue
3228 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3229 return TokenNamecase;
3231 return TokenNameIdentifier;
3233 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3234 return TokenNamecatch;
3236 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3237 return TokenNameclass;
3239 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3240 return TokenNameclone;
3242 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3243 return TokenNameconst;
3245 return TokenNameIdentifier;
3247 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3248 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3249 return TokenNamecontinue;
3251 return TokenNameIdentifier;
3253 return TokenNameIdentifier;
3256 // declare default do die
3257 // TODO delete define ==> no keyword !
3260 if ((data[++index] == 'o'))
3263 return TokenNameIdentifier;
3265 // if ((data[++index] == 'e')
3266 // && (data[++index] == 'f')
3267 // && (data[++index] == 'i')
3268 // && (data[++index] == 'n')
3269 // && (data[++index] == 'e'))
3270 // return TokenNamedefine;
3272 // return TokenNameIdentifier;
3274 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3275 && (data[++index] == 'r') && (data[++index] == 'e'))
3276 return TokenNamedeclare;
3278 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3279 && (data[++index] == 'l') && (data[++index] == 't'))
3280 return TokenNamedefault;
3282 return TokenNameIdentifier;
3284 return TokenNameIdentifier;
3287 // echo else exit elseif extends eval
3290 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3291 return TokenNameecho;
3292 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3293 return TokenNameelse;
3294 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3295 return TokenNameexit;
3296 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3297 return TokenNameeval;
3299 return TokenNameIdentifier;
3302 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3303 return TokenNameendif;
3304 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3305 return TokenNameempty;
3307 return TokenNameIdentifier;
3310 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3311 && (data[++index] == 'r'))
3312 return TokenNameendfor;
3313 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3314 && (data[++index] == 'f'))
3315 return TokenNameelseif;
3317 return TokenNameIdentifier;
3319 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3320 && (data[++index] == 'd') && (data[++index] == 's'))
3321 return TokenNameextends;
3323 return TokenNameIdentifier;
3326 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3327 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3328 return TokenNameendwhile;
3330 return TokenNameIdentifier;
3333 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3334 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3335 return TokenNameendswitch;
3337 return TokenNameIdentifier;
3340 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3341 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3342 && (data[++index] == 'e'))
3343 return TokenNameenddeclare;
3345 if ((data[++index] == 'n') // endforeach
3346 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3347 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3348 return TokenNameendforeach;
3350 return TokenNameIdentifier;
3352 return TokenNameIdentifier;
3355 // for false final function
3358 if ((data[++index] == 'o') && (data[++index] == 'r'))
3359 return TokenNamefor;
3361 return TokenNameIdentifier;
3363 // if ((data[++index] == 'a') && (data[++index] == 'l')
3364 // && (data[++index] == 's') && (data[++index] == 'e'))
3365 // return TokenNamefalse;
3366 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3367 return TokenNamefinal;
3369 return TokenNameIdentifier;
3372 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3373 && (data[++index] == 'c') && (data[++index] == 'h'))
3374 return TokenNameforeach;
3376 return TokenNameIdentifier;
3379 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3380 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3381 return TokenNamefunction;
3383 return TokenNameIdentifier;
3385 return TokenNameIdentifier;
3390 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3391 && (data[++index] == 'l')) {
3392 return TokenNameglobal;
3395 return TokenNameIdentifier;
3397 // if int isset include include_once instanceof interface implements
3400 if (data[++index] == 'f')
3403 return TokenNameIdentifier;
3405 // if ((data[++index] == 'n') && (data[++index] == 't'))
3406 // return TokenNameint;
3408 // return TokenNameIdentifier;
3410 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3411 return TokenNameisset;
3413 return TokenNameIdentifier;
3415 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3416 && (data[++index] == 'd') && (data[++index] == 'e'))
3417 return TokenNameinclude;
3419 return TokenNameIdentifier;
3422 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3423 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3424 return TokenNameinterface;
3426 return TokenNameIdentifier;
3429 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3430 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3431 && (data[++index] == 'f'))
3432 return TokenNameinstanceof;
3433 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3434 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3435 && (data[++index] == 's'))
3436 return TokenNameimplements;
3438 return TokenNameIdentifier;
3440 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3441 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3442 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3443 return TokenNameinclude_once;
3445 return TokenNameIdentifier;
3447 return TokenNameIdentifier;
3452 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3453 return TokenNamelist;
3456 return TokenNameIdentifier;
3461 if ((data[++index] == 'e') && (data[++index] == 'w'))
3462 return TokenNamenew;
3464 return TokenNameIdentifier;
3466 // if ((data[++index] == 'u') && (data[++index] == 'l')
3467 // && (data[++index] == 'l'))
3468 // return TokenNamenull;
3470 // return TokenNameIdentifier;
3472 return TokenNameIdentifier;
3477 if (data[++index] == 'r') {
3481 // if (length == 12) {
3482 // if ((data[++index] == 'l')
3483 // && (data[++index] == 'd')
3484 // && (data[++index] == '_')
3485 // && (data[++index] == 'f')
3486 // && (data[++index] == 'u')
3487 // && (data[++index] == 'n')
3488 // && (data[++index] == 'c')
3489 // && (data[++index] == 't')
3490 // && (data[++index] == 'i')
3491 // && (data[++index] == 'o')
3492 // && (data[++index] == 'n')) {
3493 // return TokenNameold_function;
3496 return TokenNameIdentifier;
3498 // print public private protected
3501 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3502 return TokenNameprint;
3504 return TokenNameIdentifier;
3506 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3507 && (data[++index] == 'c')) {
3508 return TokenNamepublic;
3510 return TokenNameIdentifier;
3512 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3513 && (data[++index] == 't') && (data[++index] == 'e')) {
3514 return TokenNameprivate;
3516 return TokenNameIdentifier;
3518 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3519 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3520 return TokenNameprotected;
3522 return TokenNameIdentifier;
3524 return TokenNameIdentifier;
3526 // return require require_once
3528 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3529 && (data[++index] == 'n')) {
3530 return TokenNamereturn;
3532 } else if (length == 7) {
3533 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3534 && (data[++index] == 'r') && (data[++index] == 'e')) {
3535 return TokenNamerequire;
3537 } else if (length == 12) {
3538 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3539 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3540 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3541 return TokenNamerequire_once;
3544 return TokenNameIdentifier;
3546 // self static switch
3549 // if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index] == 'f')) {
3550 // return TokenNameself;
3552 // return TokenNameIdentifier;
3554 if (data[++index] == 't')
3555 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3556 return TokenNamestatic;
3558 return TokenNameIdentifier;
3559 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3560 && (data[++index] == 'h'))
3561 return TokenNameswitch;
3563 return TokenNameIdentifier;
3565 return TokenNameIdentifier;
3571 if ((data[++index] == 'r') && (data[++index] == 'y'))
3572 return TokenNametry;
3574 return TokenNameIdentifier;
3576 // if ((data[++index] == 'r') && (data[++index] == 'u')
3577 // && (data[++index] == 'e'))
3578 // return TokenNametrue;
3580 // return TokenNameIdentifier;
3582 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3583 return TokenNamethrow;
3585 return TokenNameIdentifier;
3587 return TokenNameIdentifier;
3593 if ((data[++index] == 's') && (data[++index] == 'e'))
3594 return TokenNameuse;
3596 return TokenNameIdentifier;
3598 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3599 return TokenNameunset;
3601 return TokenNameIdentifier;
3603 return TokenNameIdentifier;
3609 if ((data[++index] == 'a') && (data[++index] == 'r'))
3610 return TokenNamevar;
3612 return TokenNameIdentifier;
3614 return TokenNameIdentifier;
3620 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3621 return TokenNamewhile;
3623 return TokenNameIdentifier;
3624 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3625 // (data[++index]=='e') && (data[++index]=='f')&&
3626 // (data[++index]=='p'))
3627 // return TokenNamewidefp ;
3629 // return TokenNameIdentifier;
3631 return TokenNameIdentifier;
3637 if ((data[++index] == 'o') && (data[++index] == 'r'))
3638 return TokenNamexor;
3640 return TokenNameIdentifier;
3642 return TokenNameIdentifier;
3645 return TokenNameIdentifier;
3649 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3650 // when entering this method the currentCharacter is the firt
3651 // digit of the number , i.e. it may be preceeded by a . when
3652 // dotPrefix is true
3653 boolean floating = dotPrefix;
3654 if ((!dotPrefix) && (currentCharacter == '0')) {
3655 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3656 // force the first char of the hexa number do exist...
3657 // consume next character
3658 unicodeAsBackSlash = false;
3659 currentCharacter = source[currentPosition++];
3660 // if (((currentCharacter = source[currentPosition++]) == '\\')
3661 // && (source[currentPosition] == 'u')) {
3662 // getNextUnicodeChar();
3664 // if (withoutUnicodePtr != 0) {
3665 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3668 if (Character.digit(currentCharacter, 16) == -1)
3669 throw new InvalidInputException(INVALID_HEXA);
3671 while (getNextCharAsDigit(16)) {
3674 // if (getNextChar('l', 'L') >= 0)
3675 // return TokenNameLongLiteral;
3677 return TokenNameIntegerLiteral;
3679 // there is x or X in the number
3680 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3681 // 00078.0 is true !!!!! crazy language
3682 if (getNextCharAsDigit()) {
3683 // -------------potential octal-----------------
3684 while (getNextCharAsDigit()) {
3687 // if (getNextChar('l', 'L') >= 0) {
3688 // return TokenNameLongLiteral;
3691 // if (getNextChar('f', 'F') >= 0) {
3692 // return TokenNameFloatingPointLiteral;
3694 if (getNextChar('d', 'D') >= 0) {
3695 return TokenNameDoubleLiteral;
3696 } else { // make the distinction between octal and float ....
3697 if (getNextChar('.')) { // bingo ! ....
3698 while (getNextCharAsDigit()) {
3701 if (getNextChar('e', 'E') >= 0) {
3702 // consume next character
3703 unicodeAsBackSlash = false;
3704 currentCharacter = source[currentPosition++];
3705 // if (((currentCharacter = source[currentPosition++]) == '\\')
3706 // && (source[currentPosition] == 'u')) {
3707 // getNextUnicodeChar();
3709 // if (withoutUnicodePtr != 0) {
3710 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3713 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3714 // consume next character
3715 unicodeAsBackSlash = false;
3716 currentCharacter = source[currentPosition++];
3717 // if (((currentCharacter = source[currentPosition++]) == '\\')
3718 // && (source[currentPosition] == 'u')) {
3719 // getNextUnicodeChar();
3721 // if (withoutUnicodePtr != 0) {
3722 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3723 // currentCharacter;
3727 if (!Character.isDigit(currentCharacter))
3728 throw new InvalidInputException(INVALID_FLOAT);
3729 while (getNextCharAsDigit()) {
3733 // if (getNextChar('f', 'F') >= 0)
3734 // return TokenNameFloatingPointLiteral;
3735 getNextChar('d', 'D'); // jump over potential d or D
3736 return TokenNameDoubleLiteral;
3738 return TokenNameIntegerLiteral;
3745 while (getNextCharAsDigit()) {
3748 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3749 // return TokenNameLongLiteral;
3750 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3751 while (getNextCharAsDigit()) {
3756 // if floating is true both exponant and suffix may be optional
3757 if (getNextChar('e', 'E') >= 0) {
3759 // consume next character
3760 unicodeAsBackSlash = false;
3761 currentCharacter = source[currentPosition++];
3762 // if (((currentCharacter = source[currentPosition++]) == '\\')
3763 // && (source[currentPosition] == 'u')) {
3764 // getNextUnicodeChar();
3766 // if (withoutUnicodePtr != 0) {
3767 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3770 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3773 unicodeAsBackSlash = false;
3774 currentCharacter = source[currentPosition++];
3775 // if (((currentCharacter = source[currentPosition++]) == '\\')
3776 // && (source[currentPosition] == 'u')) {
3777 // getNextUnicodeChar();
3779 // if (withoutUnicodePtr != 0) {
3780 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3784 if (!Character.isDigit(currentCharacter))
3785 throw new InvalidInputException(INVALID_FLOAT);
3786 while (getNextCharAsDigit()) {
3790 if (getNextChar('d', 'D') >= 0)
3791 return TokenNameDoubleLiteral;
3792 // if (getNextChar('f', 'F') >= 0)
3793 // return TokenNameFloatingPointLiteral;
3794 // the long flag has been tested before
3795 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3799 * Search the line number corresponding to a specific position
3802 public final int getLineNumber(int position) {
3803 if (lineEnds == null)
3805 int length = linePtr + 1;
3808 int g = 0, d = length - 1;
3812 if (position < lineEnds[m]) {
3814 } else if (position > lineEnds[m]) {
3820 if (position < lineEnds[m]) {
3826 public void setPHPMode(boolean mode) {
3830 public final void setSource(char[] source) {
3831 setSource(null, source);
3834 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3835 // the source-buffer is set to sourceString
3836 this.compilationUnit = compilationUnit;
3837 if (source == null) {
3838 this.source = new char[0];
3840 this.source = source;
3843 initialPosition = currentPosition = 0;
3844 containsAssertKeyword = false;
3845 withoutUnicodeBuffer = new char[this.source.length];
3846 // encapsedStringStack = new Stack();
3849 public String toString() {
3850 if (startPosition == source.length)
3851 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3852 if (currentPosition > source.length)
3853 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3854 char front[] = new char[startPosition];
3855 System.arraycopy(source, 0, front, 0, startPosition);
3856 int middleLength = (currentPosition - 1) - startPosition + 1;
3858 if (middleLength > -1) {
3859 middle = new char[middleLength];
3860 System.arraycopy(source, startPosition, middle, 0, middleLength);
3862 middle = new char[0];
3864 char end[] = new char[source.length - (currentPosition - 1)];
3865 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3866 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3867 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3871 public final String toStringAction(int act) {
3873 case TokenNameERROR:
3874 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3876 case TokenNameINLINE_HTML:
3877 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3878 case TokenNameIdentifier:
3879 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3880 case TokenNameVariable:
3881 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3882 case TokenNameabstract:
3883 return "abstract"; //$NON-NLS-1$
3885 return "AND"; //$NON-NLS-1$
3886 case TokenNamearray:
3887 return "array"; //$NON-NLS-1$
3889 return "as"; //$NON-NLS-1$
3890 case TokenNamebreak:
3891 return "break"; //$NON-NLS-1$
3893 return "case"; //$NON-NLS-1$
3894 case TokenNameclass:
3895 return "class"; //$NON-NLS-1$
3896 case TokenNamecatch:
3897 return "catch"; //$NON-NLS-1$
3898 case TokenNameclone:
3901 case TokenNameconst:
3904 case TokenNamecontinue:
3905 return "continue"; //$NON-NLS-1$
3906 case TokenNamedefault:
3907 return "default"; //$NON-NLS-1$
3908 // case TokenNamedefine :
3909 // return "define"; //$NON-NLS-1$
3911 return "do"; //$NON-NLS-1$
3913 return "echo"; //$NON-NLS-1$
3915 return "else"; //$NON-NLS-1$
3916 case TokenNameelseif:
3917 return "elseif"; //$NON-NLS-1$
3918 case TokenNameendfor:
3919 return "endfor"; //$NON-NLS-1$
3920 case TokenNameendforeach:
3921 return "endforeach"; //$NON-NLS-1$
3922 case TokenNameendif:
3923 return "endif"; //$NON-NLS-1$
3924 case TokenNameendswitch:
3925 return "endswitch"; //$NON-NLS-1$
3926 case TokenNameendwhile:
3927 return "endwhile"; //$NON-NLS-1$
3930 case TokenNameextends:
3931 return "extends"; //$NON-NLS-1$
3932 // case TokenNamefalse :
3933 // return "false"; //$NON-NLS-1$
3934 case TokenNamefinal:
3935 return "final"; //$NON-NLS-1$
3937 return "for"; //$NON-NLS-1$
3938 case TokenNameforeach:
3939 return "foreach"; //$NON-NLS-1$
3940 case TokenNamefunction:
3941 return "function"; //$NON-NLS-1$
3942 case TokenNameglobal:
3943 return "global"; //$NON-NLS-1$
3945 return "if"; //$NON-NLS-1$
3946 case TokenNameimplements:
3947 return "implements"; //$NON-NLS-1$
3948 case TokenNameinclude:
3949 return "include"; //$NON-NLS-1$
3950 case TokenNameinclude_once:
3951 return "include_once"; //$NON-NLS-1$
3952 case TokenNameinstanceof:
3953 return "instanceof"; //$NON-NLS-1$
3954 case TokenNameinterface:
3955 return "interface"; //$NON-NLS-1$
3956 case TokenNameisset:
3957 return "isset"; //$NON-NLS-1$
3959 return "list"; //$NON-NLS-1$
3961 return "new"; //$NON-NLS-1$
3962 // case TokenNamenull :
3963 // return "null"; //$NON-NLS-1$
3965 return "OR"; //$NON-NLS-1$
3966 case TokenNameprint:
3967 return "print"; //$NON-NLS-1$
3968 case TokenNameprivate:
3969 return "private"; //$NON-NLS-1$
3970 case TokenNameprotected:
3971 return "protected"; //$NON-NLS-1$
3972 case TokenNamepublic:
3973 return "public"; //$NON-NLS-1$
3974 case TokenNamerequire:
3975 return "require"; //$NON-NLS-1$
3976 case TokenNamerequire_once:
3977 return "require_once"; //$NON-NLS-1$
3978 case TokenNamereturn:
3979 return "return"; //$NON-NLS-1$
3980 // case TokenNameself:
3981 // return "self"; //$NON-NLS-1$
3982 case TokenNamestatic:
3983 return "static"; //$NON-NLS-1$
3984 case TokenNameswitch:
3985 return "switch"; //$NON-NLS-1$
3986 // case TokenNametrue :
3987 // return "true"; //$NON-NLS-1$
3988 case TokenNameunset:
3989 return "unset"; //$NON-NLS-1$
3991 return "var"; //$NON-NLS-1$
3992 case TokenNamewhile:
3993 return "while"; //$NON-NLS-1$
3995 return "XOR"; //$NON-NLS-1$
3996 // case TokenNamethis :
3997 // return "$this"; //$NON-NLS-1$
3998 case TokenNameIntegerLiteral:
3999 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4000 case TokenNameDoubleLiteral:
4001 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4002 case TokenNameStringDoubleQuote:
4003 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4004 case TokenNameStringSingleQuote:
4005 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4006 case TokenNameStringInterpolated:
4007 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4008 case TokenNameEncapsedString0:
4009 return "`"; //$NON-NLS-1$
4010 // case TokenNameEncapsedString1:
4011 // return "\'"; //$NON-NLS-1$
4012 // case TokenNameEncapsedString2:
4013 // return "\""; //$NON-NLS-1$
4014 case TokenNameSTRING:
4015 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4016 case TokenNameHEREDOC:
4017 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4018 case TokenNamePLUS_PLUS:
4019 return "++"; //$NON-NLS-1$
4020 case TokenNameMINUS_MINUS:
4021 return "--"; //$NON-NLS-1$
4022 case TokenNameEQUAL_EQUAL:
4023 return "=="; //$NON-NLS-1$
4024 case TokenNameEQUAL_EQUAL_EQUAL:
4025 return "==="; //$NON-NLS-1$
4026 case TokenNameEQUAL_GREATER:
4027 return "=>"; //$NON-NLS-1$
4028 case TokenNameLESS_EQUAL:
4029 return "<="; //$NON-NLS-1$
4030 case TokenNameGREATER_EQUAL:
4031 return ">="; //$NON-NLS-1$
4032 case TokenNameNOT_EQUAL:
4033 return "!="; //$NON-NLS-1$
4034 case TokenNameNOT_EQUAL_EQUAL:
4035 return "!=="; //$NON-NLS-1$
4036 case TokenNameLEFT_SHIFT:
4037 return "<<"; //$NON-NLS-1$
4038 case TokenNameRIGHT_SHIFT:
4039 return ">>"; //$NON-NLS-1$
4040 case TokenNamePLUS_EQUAL:
4041 return "+="; //$NON-NLS-1$
4042 case TokenNameMINUS_EQUAL:
4043 return "-="; //$NON-NLS-1$
4044 case TokenNameMULTIPLY_EQUAL:
4045 return "*="; //$NON-NLS-1$
4046 case TokenNameDIVIDE_EQUAL:
4047 return "/="; //$NON-NLS-1$
4048 case TokenNameAND_EQUAL:
4049 return "&="; //$NON-NLS-1$
4050 case TokenNameOR_EQUAL:
4051 return "|="; //$NON-NLS-1$
4052 case TokenNameXOR_EQUAL:
4053 return "^="; //$NON-NLS-1$
4054 case TokenNameREMAINDER_EQUAL:
4055 return "%="; //$NON-NLS-1$
4056 case TokenNameDOT_EQUAL:
4057 return ".="; //$NON-NLS-1$
4058 case TokenNameLEFT_SHIFT_EQUAL:
4059 return "<<="; //$NON-NLS-1$
4060 case TokenNameRIGHT_SHIFT_EQUAL:
4061 return ">>="; //$NON-NLS-1$
4062 case TokenNameOR_OR:
4063 return "||"; //$NON-NLS-1$
4064 case TokenNameAND_AND:
4065 return "&&"; //$NON-NLS-1$
4067 return "+"; //$NON-NLS-1$
4068 case TokenNameMINUS:
4069 return "-"; //$NON-NLS-1$
4070 case TokenNameMINUS_GREATER:
4073 return "!"; //$NON-NLS-1$
4074 case TokenNameREMAINDER:
4075 return "%"; //$NON-NLS-1$
4077 return "^"; //$NON-NLS-1$
4079 return "&"; //$NON-NLS-1$
4080 case TokenNameMULTIPLY:
4081 return "*"; //$NON-NLS-1$
4083 return "|"; //$NON-NLS-1$
4084 case TokenNameTWIDDLE:
4085 return "~"; //$NON-NLS-1$
4086 case TokenNameTWIDDLE_EQUAL:
4087 return "~="; //$NON-NLS-1$
4088 case TokenNameDIVIDE:
4089 return "/"; //$NON-NLS-1$
4090 case TokenNameGREATER:
4091 return ">"; //$NON-NLS-1$
4093 return "<"; //$NON-NLS-1$
4094 case TokenNameLPAREN:
4095 return "("; //$NON-NLS-1$
4096 case TokenNameRPAREN:
4097 return ")"; //$NON-NLS-1$
4098 case TokenNameLBRACE:
4099 return "{"; //$NON-NLS-1$
4100 case TokenNameRBRACE:
4101 return "}"; //$NON-NLS-1$
4102 case TokenNameLBRACKET:
4103 return "["; //$NON-NLS-1$
4104 case TokenNameRBRACKET:
4105 return "]"; //$NON-NLS-1$
4106 case TokenNameSEMICOLON:
4107 return ";"; //$NON-NLS-1$
4108 case TokenNameQUESTION:
4109 return "?"; //$NON-NLS-1$
4110 case TokenNameCOLON:
4111 return ":"; //$NON-NLS-1$
4112 case TokenNameCOMMA:
4113 return ","; //$NON-NLS-1$
4115 return "."; //$NON-NLS-1$
4116 case TokenNameEQUAL:
4117 return "="; //$NON-NLS-1$
4120 case TokenNameDOLLAR:
4122 case TokenNameDOLLAR_LBRACE:
4124 case TokenNameLBRACE_DOLLAR:
4127 return "EOF"; //$NON-NLS-1$
4128 case TokenNameWHITESPACE:
4129 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4130 case TokenNameCOMMENT_LINE:
4131 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4132 case TokenNameCOMMENT_BLOCK:
4133 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4134 case TokenNameCOMMENT_PHPDOC:
4135 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4136 // case TokenNameHTML :
4137 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4140 return "__FILE__"; //$NON-NLS-1$
4142 return "__LINE__"; //$NON-NLS-1$
4143 case TokenNameCLASS_C:
4144 return "__CLASS__"; //$NON-NLS-1$
4145 case TokenNameMETHOD_C:
4146 return "__METHOD__"; //$NON-NLS-1$
4147 case TokenNameFUNC_C:
4148 return "__FUNCTION__"; //$NON-NLS-1
4149 case TokenNameboolCAST:
4150 return "( bool )"; //$NON-NLS-1$
4151 case TokenNameintCAST:
4152 return "( int )"; //$NON-NLS-1$
4153 case TokenNamedoubleCAST:
4154 return "( double )"; //$NON-NLS-1$
4155 case TokenNameobjectCAST:
4156 return "( object )"; //$NON-NLS-1$
4157 case TokenNamestringCAST:
4158 return "( string )"; //$NON-NLS-1$
4160 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4168 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4169 this(tokenizeComments, tokenizeWhiteSpace, false);
4172 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4173 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4176 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4177 boolean assertMode) {
4178 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4181 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4182 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4183 this.eofPosition = Integer.MAX_VALUE;
4184 this.tokenizeComments = tokenizeComments;
4185 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4186 this.tokenizeStrings = tokenizeStrings;
4187 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4188 // this.assertMode = assertMode;
4189 // this.encapsedStringStack = null;
4190 this.taskTags = taskTags;
4191 this.taskPriorities = taskPriorities;
4194 private void checkNonExternalizeString() throws InvalidInputException {
4195 if (currentLine == null)
4197 parseTags(currentLine);
4200 private void parseTags(NLSLine line) throws InvalidInputException {
4201 String s = new String(getCurrentTokenSource());
4202 int pos = s.indexOf(TAG_PREFIX);
4203 int lineLength = line.size();
4205 int start = pos + TAG_PREFIX_LENGTH;
4206 int end = s.indexOf(TAG_POSTFIX, start);
4207 String index = s.substring(start, end);
4210 i = Integer.parseInt(index) - 1;
4211 // Tags are one based not zero based.
4212 } catch (NumberFormatException e) {
4213 i = -1; // we don't want to consider this as a valid NLS tag
4215 if (line.exists(i)) {
4218 pos = s.indexOf(TAG_PREFIX, start);
4220 this.nonNLSStrings = new StringLiteral[lineLength];
4221 int nonNLSCounter = 0;
4222 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4223 StringLiteral literal = (StringLiteral) iterator.next();
4224 if (literal != null) {
4225 this.nonNLSStrings[nonNLSCounter++] = literal;
4228 if (nonNLSCounter == 0) {
4229 this.nonNLSStrings = null;
4233 this.wasNonExternalizedStringLiteral = true;
4234 if (nonNLSCounter != lineLength) {
4235 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4240 public final void scanEscapeCharacter() throws InvalidInputException {
4241 // the string with "\\u" is a legal string of two chars \ and u
4242 // thus we use a direct access to the source (for regular cases).
4243 if (unicodeAsBackSlash) {
4244 // consume next character
4245 unicodeAsBackSlash = false;
4246 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4247 // (source[currentPosition] == 'u')) {
4248 // getNextUnicodeChar();
4250 if (withoutUnicodePtr != 0) {
4251 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4255 currentCharacter = source[currentPosition++];
4256 switch (currentCharacter) {
4258 currentCharacter = '\b';
4261 currentCharacter = '\t';
4264 currentCharacter = '\n';
4267 currentCharacter = '\f';
4270 currentCharacter = '\r';
4273 currentCharacter = '\"';
4276 currentCharacter = '\'';
4279 currentCharacter = '\\';
4282 // -----------octal escape--------------
4284 // OctalDigit OctalDigit
4285 // ZeroToThree OctalDigit OctalDigit
4286 int number = Character.getNumericValue(currentCharacter);
4287 if (number >= 0 && number <= 7) {
4288 boolean zeroToThreeNot = number > 3;
4289 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4290 int digit = Character.getNumericValue(currentCharacter);
4291 if (digit >= 0 && digit <= 7) {
4292 number = (number * 8) + digit;
4293 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4294 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4295 // Digit --> ignore last character
4298 digit = Character.getNumericValue(currentCharacter);
4299 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4300 // OctalDigit OctalDigit
4301 number = (number * 8) + digit;
4302 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4303 // --> ignore last character
4307 } else { // has read \OctalDigit NonDigit--> ignore last
4311 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4315 } else { // has read \OctalDigit --> ignore last character
4319 throw new InvalidInputException(INVALID_ESCAPE);
4320 currentCharacter = (char) number;
4322 throw new InvalidInputException(INVALID_ESCAPE);
4326 // chech presence of task: tags
4327 // TODO (frederic) see if we need to take unicode characters into account...
4328 public void checkTaskTag(int commentStart, int commentEnd) {
4329 char[] src = this.source;
4331 // only look for newer task: tags
4332 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4335 int foundTaskIndex = this.foundTaskCount;
4336 char previous = src[commentStart + 1]; // should be '*' or '/'
4337 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4339 char[] priority = null;
4340 // check for tag occurrence only if not ambiguous with javadoc tag
4341 if (previous != '@') {
4342 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4343 tag = this.taskTags[itag];
4344 int tagLength = tag.length;
4348 // ensure tag is not leaded with letter if tag starts with a letter
4349 if (Scanner.isPHPIdentifierStart(tag[0])) {
4350 if (Scanner.isPHPIdentifierPart(previous)) {
4355 for (int t = 0; t < tagLength; t++) {
4358 if (x >= this.eofPosition || x >= commentEnd)
4360 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4361 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4368 // ensure tag is not followed with letter if tag finishes with a
4370 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4371 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4374 if (this.foundTaskTags == null) {
4375 this.foundTaskTags = new char[5][];
4376 this.foundTaskMessages = new char[5][];
4377 this.foundTaskPriorities = new char[5][];
4378 this.foundTaskPositions = new int[5][];
4379 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4380 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4381 this.foundTaskCount);
4382 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4383 this.foundTaskCount);
4384 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4385 this.foundTaskCount);
4386 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4387 this.foundTaskCount);
4390 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4392 this.foundTaskTags[this.foundTaskCount] = tag;
4393 this.foundTaskPriorities[this.foundTaskCount] = priority;
4394 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4395 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4396 this.foundTaskCount++;
4397 i += tagLength - 1; // will be incremented when looping
4403 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4404 // retrieve message start and end positions
4405 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4406 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4407 // at most beginning of next task
4408 if (max_value < msgStart) {
4409 max_value = msgStart; // would only occur if tag is before EOF.
4413 for (int j = msgStart; j < max_value; j++) {
4414 if ((c = src[j]) == '\n' || c == '\r') {
4420 for (int j = max_value; j > msgStart; j--) {
4421 if ((c = src[j]) == '*') {
4429 if (msgStart == end)
4432 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4434 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4436 // update the end position of the task
4437 this.foundTaskPositions[i][1] = end;
4438 // get the message source
4439 final int messageLength = end - msgStart + 1;
4440 char[] message = new char[messageLength];
4441 System.arraycopy(src, msgStart, message, 0, messageLength);
4442 this.foundTaskMessages[i] = message;
4446 // chech presence of task: tags
4447 // public void checkTaskTag(int commentStart, int commentEnd) {
4448 // // only look for newer task: tags
4449 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4450 // - 1][0] >= commentStart) {
4453 // int foundTaskIndex = this.foundTaskCount;
4454 // nextChar: for (int i = commentStart; i < commentEnd && i <
4455 // this.eofPosition; i++) {
4456 // char[] tag = null;
4457 // char[] priority = null;
4458 // // check for tag occurrence
4459 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4460 // tag = this.taskTags[itag];
4461 // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4462 // ? this.taskPriorities[itag] : null;
4463 // int tagLength = tag.length;
4464 // for (int t = 0; t < tagLength; t++) {
4465 // if (this.source[i + t] != tag[t])
4466 // continue nextTag;
4468 // if (this.foundTaskTags == null) {
4469 // this.foundTaskTags = new char[5][];
4470 // this.foundTaskMessages = new char[5][];
4471 // this.foundTaskPriorities = new char[5][];
4472 // this.foundTaskPositions = new int[5][];
4473 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4474 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4475 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4476 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4477 // char[this.foundTaskCount * 2][], 0,
4478 // this.foundTaskCount);
4479 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4480 // new char[this.foundTaskCount * 2][], 0,
4481 // this.foundTaskCount);
4482 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4483 // int[this.foundTaskCount * 2][], 0,
4484 // this.foundTaskCount);
4486 // this.foundTaskTags[this.foundTaskCount] = tag;
4487 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4488 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4490 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4491 // this.foundTaskCount++;
4492 // i += tagLength - 1; // will be incremented when looping
4495 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4496 // // retrieve message start and end positions
4497 // int msgStart = this.foundTaskPositions[i][0] +
4498 // this.foundTaskTags[i].length;
4499 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4500 // 1][0] - 1 : commentEnd - 1;
4501 // // at most beginning of next task
4502 // if (max_value < msgStart)
4503 // max_value = msgStart; // would only occur if tag is before EOF.
4506 // for (int j = msgStart; j < max_value; j++) {
4507 // if ((c = this.source[j]) == '\n' || c == '\r') {
4513 // for (int j = max_value; j > msgStart; j--) {
4514 // if ((c = this.source[j]) == '*') {
4522 // if (msgStart == end)
4523 // continue; // empty
4524 // // trim the message
4525 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4527 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4529 // // update the end position of the task
4530 // this.foundTaskPositions[i][1] = end;
4531 // // get the message source
4532 // final int messageLength = end - msgStart + 1;
4533 // char[] message = new char[messageLength];
4534 // System.arraycopy(source, msgStart, message, 0, messageLength);
4535 // this.foundTaskMessages[i] = message;