1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public Stack encapsedStringStack = null;
44 public char currentCharacter;
46 public int startPosition;
48 public int currentPosition;
50 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the
54 public boolean tokenizeComments;
56 public boolean tokenizeWhiteSpace;
58 public boolean tokenizeStrings;
60 //source should be viewed as a window (aka a part)
61 //of a entire very large stream
65 public char[] withoutUnicodeBuffer;
67 public int withoutUnicodePtr;
69 //when == 0 ==> no unicode in the current token
70 public boolean unicodeAsBackSlash = false;
72 public boolean scanningFloatLiteral = false;
74 //support for /** comments
75 public int[] commentStops = new int[10];
77 public int[] commentStarts = new int[10];
79 public int commentPtr = -1; // no comment test with commentPtr value -1
81 protected int lastCommentLinePosition = -1;
83 //diet parsing support - jump over some method body when requested
84 public boolean diet = false;
86 //support for the poor-line-debuggers ....
87 //remember the position of the cr/lf
88 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
92 public boolean wasAcr = false;
94 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
96 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
98 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
100 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
102 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
104 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
106 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
108 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
110 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
112 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
116 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
118 //----------------optimized identifier managment------------------
119 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
120 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
121 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
122 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
123 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
124 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
125 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
126 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
127 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
129 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
131 static final int TableSize = 30, InternalTableSize = 6;
134 public static final int OptimizedLength = 6;
137 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
139 // support for detecting non-externalized string literals
140 int currentLineNr = -1;
142 int previousLineNr = -1;
144 NLSLine currentLine = null;
146 List lines = new ArrayList();
148 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
150 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
152 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
154 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
156 public StringLiteral[] nonNLSStrings = null;
158 public boolean checkNonExternalizedStringLiterals = true;
160 public boolean wasNonExternalizedStringLiteral = false;
162 for (int i = 0; i < 6; i++) {
163 for (int j = 0; j < TableSize; j++) {
164 for (int k = 0; k < InternalTableSize; k++) {
165 charArray_length[i][j][k] = initCharArray;
171 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
173 public static final int RoundBracket = 0;
175 public static final int SquareBracket = 1;
177 public static final int CurlyBracket = 2;
179 public static final int BracketKinds = 3;
182 public char[][] foundTaskTags = null;
184 public char[][] foundTaskMessages;
186 public char[][] foundTaskPriorities = null;
188 public int[][] foundTaskPositions;
190 public int foundTaskCount = 0;
192 public char[][] taskTags = null;
194 public char[][] taskPriorities = null;
195 public boolean isTaskCaseSensitive = true;
196 public static final boolean DEBUG = false;
198 public static final boolean TRACE = false;
200 public ICompilationUnit compilationUnit = null;
202 * Determines if the specified character is permissible
203 * as the first character in a PHP identifier.
205 * The '$' character for HP variables isn't regarded as the first character !
207 public static boolean isPHPIdentifierStart(char ch) {
208 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
212 * Determines if the specified character may be part of a PHP
213 * identifier as other than the first character
215 public static boolean isPHPIdentifierPart(char ch) {
216 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
219 public final boolean atEnd() {
220 // This code is not relevant if source is
221 // Only a part of the real stream input
222 return source.length == currentPosition;
225 public char[] getCurrentIdentifierSource() {
226 //return the token REAL source (aka unicodes are precomputed)
228 // if (withoutUnicodePtr != 0)
229 // //0 is used as a fast test flag so the real first char is in position 1
231 // withoutUnicodeBuffer,
233 // result = new char[withoutUnicodePtr],
235 // withoutUnicodePtr);
237 int length = currentPosition - startPosition;
238 switch (length) { // see OptimizedLength
240 return optimizedCurrentTokenSource1();
242 return optimizedCurrentTokenSource2();
244 return optimizedCurrentTokenSource3();
246 return optimizedCurrentTokenSource4();
248 return optimizedCurrentTokenSource5();
250 return optimizedCurrentTokenSource6();
253 System.arraycopy(source, startPosition, result = new char[length], 0, length);
258 public int getCurrentTokenEndPosition() {
259 return this.currentPosition - 1;
262 public final char[] getCurrentTokenSource() {
263 // Return the token REAL source (aka unicodes are precomputed)
265 // if (withoutUnicodePtr != 0)
266 // // 0 is used as a fast test flag so the real first char is in position 1
268 // withoutUnicodeBuffer,
270 // result = new char[withoutUnicodePtr],
272 // withoutUnicodePtr);
275 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
280 public final char[] getCurrentTokenSource(int startPos) {
281 // Return the token REAL source (aka unicodes are precomputed)
283 // if (withoutUnicodePtr != 0)
284 // // 0 is used as a fast test flag so the real first char is in position 1
286 // withoutUnicodeBuffer,
288 // result = new char[withoutUnicodePtr],
290 // withoutUnicodePtr);
293 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
298 public final char[] getCurrentTokenSourceString() {
299 //return the token REAL source (aka unicodes are precomputed).
300 //REMOVE the two " that are at the beginning and the end.
302 if (withoutUnicodePtr != 0)
303 //0 is used as a fast test flag so the real first char is in position 1
304 System.arraycopy(withoutUnicodeBuffer, 2,
305 //2 is 1 (real start) + 1 (to jump over the ")
306 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
309 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
314 public int getCurrentTokenStartPosition() {
315 return this.startPosition;
318 public final char[] getCurrentStringLiteralSource() {
319 // Return the token REAL source (aka unicodes are precomputed)
320 if (startPosition + 1 >= currentPosition) {
325 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
330 public final char[] getCurrentStringLiteralSource(int startPos) {
331 // Return the token REAL source (aka unicodes are precomputed)
334 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
339 * Search the source position corresponding to the end of a given line number
341 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
343 * In case the given line number is inconsistent, answers -1.
345 public final int getLineEnd(int lineNumber) {
346 if (lineEnds == null)
348 if (lineNumber >= lineEnds.length)
352 if (lineNumber == lineEnds.length - 1)
354 return lineEnds[lineNumber - 1];
355 // next line start one character behind the lineEnd of the previous line
359 * Search the source position corresponding to the beginning of a given line number
361 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
363 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
365 * In case the given line number is inconsistent, answers -1.
367 public final int getLineStart(int lineNumber) {
368 if (lineEnds == null)
370 if (lineNumber >= lineEnds.length)
375 return initialPosition;
376 return lineEnds[lineNumber - 2] + 1;
377 // next line start one character behind the lineEnd of the previous line
380 public final boolean getNextChar(char testedChar) {
382 //handle the case of unicode.
383 //when a unicode appears then we must use a buffer that holds char
385 //At the end of this method currentCharacter holds the new visited char
386 //and currentPosition points right next after it
387 //Both previous lines are true if the currentCharacter is == to the
389 //On false, no side effect has occured.
390 //ALL getNextChar.... ARE OPTIMIZED COPIES
391 int temp = currentPosition;
393 currentCharacter = source[currentPosition++];
394 // if (((currentCharacter = source[currentPosition++]) == '\\')
395 // && (source[currentPosition] == 'u')) {
396 // //-------------unicode traitement ------------
397 // int c1, c2, c3, c4;
398 // int unicodeSize = 6;
399 // currentPosition++;
400 // while (source[currentPosition] == 'u') {
401 // currentPosition++;
405 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
407 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
409 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
411 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
413 // currentPosition = temp;
417 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
418 // if (currentCharacter != testedChar) {
419 // currentPosition = temp;
422 // unicodeAsBackSlash = currentCharacter == '\\';
424 // //need the unicode buffer
425 // if (withoutUnicodePtr == 0) {
426 // //buffer all the entries that have been left aside....
427 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
431 // withoutUnicodeBuffer,
433 // withoutUnicodePtr);
435 // //fill the buffer with the char
436 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
439 // } //-------------end unicode traitement--------------
441 if (currentCharacter != testedChar) {
442 currentPosition = temp;
445 unicodeAsBackSlash = false;
446 // if (withoutUnicodePtr != 0)
447 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
450 } catch (IndexOutOfBoundsException e) {
451 unicodeAsBackSlash = false;
452 currentPosition = temp;
457 public final int getNextChar(char testedChar1, char testedChar2) {
458 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
459 //test can be done with (x==0) for the first and (x>0) for the second
460 //handle the case of unicode.
461 //when a unicode appears then we must use a buffer that holds char
463 //At the end of this method currentCharacter holds the new visited char
464 //and currentPosition points right next after it
465 //Both previous lines are true if the currentCharacter is == to the
467 //On false, no side effect has occured.
468 //ALL getNextChar.... ARE OPTIMIZED COPIES
469 int temp = currentPosition;
472 currentCharacter = source[currentPosition++];
473 // if (((currentCharacter = source[currentPosition++]) == '\\')
474 // && (source[currentPosition] == 'u')) {
475 // //-------------unicode traitement ------------
476 // int c1, c2, c3, c4;
477 // int unicodeSize = 6;
478 // currentPosition++;
479 // while (source[currentPosition] == 'u') {
480 // currentPosition++;
484 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
486 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
488 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
490 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
492 // currentPosition = temp;
496 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
497 // if (currentCharacter == testedChar1)
499 // else if (currentCharacter == testedChar2)
502 // currentPosition = temp;
506 // //need the unicode buffer
507 // if (withoutUnicodePtr == 0) {
508 // //buffer all the entries that have been left aside....
509 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
513 // withoutUnicodeBuffer,
515 // withoutUnicodePtr);
517 // //fill the buffer with the char
518 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
520 // } //-------------end unicode traitement--------------
522 if (currentCharacter == testedChar1)
524 else if (currentCharacter == testedChar2)
527 currentPosition = temp;
530 // if (withoutUnicodePtr != 0)
531 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
534 } catch (IndexOutOfBoundsException e) {
535 currentPosition = temp;
540 public final boolean getNextCharAsDigit() {
542 //handle the case of unicode.
543 //when a unicode appears then we must use a buffer that holds char
545 //At the end of this method currentCharacter holds the new visited char
546 //and currentPosition points right next after it
547 //Both previous lines are true if the currentCharacter is a digit
548 //On false, no side effect has occured.
549 //ALL getNextChar.... ARE OPTIMIZED COPIES
550 int temp = currentPosition;
552 currentCharacter = source[currentPosition++];
553 // if (((currentCharacter = source[currentPosition++]) == '\\')
554 // && (source[currentPosition] == 'u')) {
555 // //-------------unicode traitement ------------
556 // int c1, c2, c3, c4;
557 // int unicodeSize = 6;
558 // currentPosition++;
559 // while (source[currentPosition] == 'u') {
560 // currentPosition++;
564 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
566 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
568 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
570 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
572 // currentPosition = temp;
576 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
577 // if (!Character.isDigit(currentCharacter)) {
578 // currentPosition = temp;
582 // //need the unicode buffer
583 // if (withoutUnicodePtr == 0) {
584 // //buffer all the entries that have been left aside....
585 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
589 // withoutUnicodeBuffer,
591 // withoutUnicodePtr);
593 // //fill the buffer with the char
594 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
596 // } //-------------end unicode traitement--------------
598 if (!Character.isDigit(currentCharacter)) {
599 currentPosition = temp;
602 // if (withoutUnicodePtr != 0)
603 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
606 } catch (IndexOutOfBoundsException e) {
607 currentPosition = temp;
612 public final boolean getNextCharAsDigit(int radix) {
614 //handle the case of unicode.
615 //when a unicode appears then we must use a buffer that holds char
617 //At the end of this method currentCharacter holds the new visited char
618 //and currentPosition points right next after it
619 //Both previous lines are true if the currentCharacter is a digit base on
621 //On false, no side effect has occured.
622 //ALL getNextChar.... ARE OPTIMIZED COPIES
623 int temp = currentPosition;
625 currentCharacter = source[currentPosition++];
626 // if (((currentCharacter = source[currentPosition++]) == '\\')
627 // && (source[currentPosition] == 'u')) {
628 // //-------------unicode traitement ------------
629 // int c1, c2, c3, c4;
630 // int unicodeSize = 6;
631 // currentPosition++;
632 // while (source[currentPosition] == 'u') {
633 // currentPosition++;
637 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
639 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
641 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
643 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
645 // currentPosition = temp;
649 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
650 // if (Character.digit(currentCharacter, radix) == -1) {
651 // currentPosition = temp;
655 // //need the unicode buffer
656 // if (withoutUnicodePtr == 0) {
657 // //buffer all the entries that have been left aside....
658 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
662 // withoutUnicodeBuffer,
664 // withoutUnicodePtr);
666 // //fill the buffer with the char
667 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
669 // } //-------------end unicode traitement--------------
671 if (Character.digit(currentCharacter, radix) == -1) {
672 currentPosition = temp;
675 // if (withoutUnicodePtr != 0)
676 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
679 } catch (IndexOutOfBoundsException e) {
680 currentPosition = temp;
685 public boolean getNextCharAsJavaIdentifierPart() {
687 //handle the case of unicode.
688 //when a unicode appears then we must use a buffer that holds char
690 //At the end of this method currentCharacter holds the new visited char
691 //and currentPosition points right next after it
692 //Both previous lines are true if the currentCharacter is a
693 // JavaIdentifierPart
694 //On false, no side effect has occured.
695 //ALL getNextChar.... ARE OPTIMIZED COPIES
696 int temp = currentPosition;
698 currentCharacter = source[currentPosition++];
699 // if (((currentCharacter = source[currentPosition++]) == '\\')
700 // && (source[currentPosition] == 'u')) {
701 // //-------------unicode traitement ------------
702 // int c1, c2, c3, c4;
703 // int unicodeSize = 6;
704 // currentPosition++;
705 // while (source[currentPosition] == 'u') {
706 // currentPosition++;
710 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
712 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
714 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
716 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
718 // currentPosition = temp;
722 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
723 // if (!isPHPIdentifierPart(currentCharacter)) {
724 // currentPosition = temp;
728 // //need the unicode buffer
729 // if (withoutUnicodePtr == 0) {
730 // //buffer all the entries that have been left aside....
731 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
735 // withoutUnicodeBuffer,
737 // withoutUnicodePtr);
739 // //fill the buffer with the char
740 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
742 // } //-------------end unicode traitement--------------
744 if (!isPHPIdentifierPart(currentCharacter)) {
745 currentPosition = temp;
748 // if (withoutUnicodePtr != 0)
749 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
752 } catch (IndexOutOfBoundsException e) {
753 currentPosition = temp;
758 public int getCastOrParen() {
759 int tempPosition = currentPosition;
760 char tempCharacter = currentCharacter;
761 int tempToken = TokenNameLPAREN;
762 boolean found = false;
763 StringBuffer buf = new StringBuffer();
766 currentCharacter = source[currentPosition++];
767 } while (currentCharacter == ' ' || currentCharacter == '\t');
768 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
769 buf.append(currentCharacter);
770 currentCharacter = source[currentPosition++];
772 if (buf.length() >= 3 && buf.length() <= 7) {
773 char[] data = buf.toString().toCharArray();
775 switch (data.length) {
778 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
780 tempToken = TokenNameintCAST;
785 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
787 tempToken = TokenNameboolCAST;
790 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
792 tempToken = TokenNamedoubleCAST;
798 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
799 && (data[++index] == 'y')) {
801 tempToken = TokenNamearrayCAST;
804 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
805 && (data[++index] == 't')) {
807 tempToken = TokenNameunsetCAST;
810 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
811 && (data[++index] == 't')) {
813 tempToken = TokenNamedoubleCAST;
819 // object string double
820 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
821 && (data[++index] == 'c') && (data[++index] == 't')) {
823 tempToken = TokenNameobjectCAST;
826 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
827 && (data[++index] == 'n') && (data[++index] == 'g')) {
829 tempToken = TokenNamestringCAST;
832 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
833 && (data[++index] == 'l') && (data[++index] == 'e')) {
835 tempToken = TokenNamedoubleCAST;
842 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
843 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
845 tempToken = TokenNameboolCAST;
848 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
849 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
851 tempToken = TokenNameintCAST;
857 while (currentCharacter == ' ' || currentCharacter == '\t') {
858 currentCharacter = source[currentPosition++];
860 if (currentCharacter == ')') {
865 } catch (IndexOutOfBoundsException e) {
867 currentCharacter = tempCharacter;
868 currentPosition = tempPosition;
869 return TokenNameLPAREN;
872 public void consumeStringInterpolated() throws InvalidInputException {
874 // consume next character
875 unicodeAsBackSlash = false;
876 currentCharacter = source[currentPosition++];
877 // if (((currentCharacter = source[currentPosition++]) == '\\')
878 // && (source[currentPosition] == 'u')) {
879 // getNextUnicodeChar();
881 // if (withoutUnicodePtr != 0) {
882 // withoutUnicodeBuffer[++withoutUnicodePtr] =
886 while (currentCharacter != '`') {
887 /** ** in PHP \r and \n are valid in string literals *** */
888 // if ((currentCharacter == '\n')
889 // || (currentCharacter == '\r')) {
890 // // relocate if finding another quote fairly close: thus unicode
891 // '/u000D' will be fully consumed
892 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
893 // if (currentPosition + lookAhead == source.length)
895 // if (source[currentPosition + lookAhead] == '\n')
897 // if (source[currentPosition + lookAhead] == '\"') {
898 // currentPosition += lookAhead + 1;
902 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
904 if (currentCharacter == '\\') {
905 int escapeSize = currentPosition;
906 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
907 //scanEscapeCharacter make a side effect on this value and we need
908 // the previous value few lines down this one
909 scanDoubleQuotedEscapeCharacter();
910 escapeSize = currentPosition - escapeSize;
911 if (withoutUnicodePtr == 0) {
912 //buffer all the entries that have been left aside....
913 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
914 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
915 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
916 } else { //overwrite the / in the buffer
917 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
918 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
919 // where only one is correct
924 // consume next character
925 unicodeAsBackSlash = false;
926 currentCharacter = source[currentPosition++];
927 // if (((currentCharacter = source[currentPosition++]) == '\\')
928 // && (source[currentPosition] == 'u')) {
929 // getNextUnicodeChar();
931 if (withoutUnicodePtr != 0) {
932 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
936 } catch (IndexOutOfBoundsException e) {
937 // reset end position for error reporting
938 currentPosition -= 2;
939 throw new InvalidInputException(UNTERMINATED_STRING);
940 } catch (InvalidInputException e) {
941 if (e.getMessage().equals(INVALID_ESCAPE)) {
942 // relocate if finding another quote fairly close: thus unicode
943 // '/u000D' will be fully consumed
944 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
945 if (currentPosition + lookAhead == source.length)
947 if (source[currentPosition + lookAhead] == '\n')
949 if (source[currentPosition + lookAhead] == '`') {
950 currentPosition += lookAhead + 1;
957 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
958 // //$NON-NLS-?$ where ? is an
960 if (currentLine == null) {
961 currentLine = new NLSLine();
962 lines.add(currentLine);
964 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
968 public void consumeStringConstant() throws InvalidInputException {
970 // consume next character
971 unicodeAsBackSlash = false;
972 currentCharacter = source[currentPosition++];
973 // if (((currentCharacter = source[currentPosition++]) == '\\')
974 // && (source[currentPosition] == 'u')) {
975 // getNextUnicodeChar();
977 // if (withoutUnicodePtr != 0) {
978 // withoutUnicodeBuffer[++withoutUnicodePtr] =
982 while (currentCharacter != '\'') {
983 /** ** in PHP \r and \n are valid in string literals *** */
984 // if ((currentCharacter == '\n')
985 // || (currentCharacter == '\r')) {
986 // // relocate if finding another quote fairly close: thus unicode
987 // '/u000D' will be fully consumed
988 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
989 // if (currentPosition + lookAhead == source.length)
991 // if (source[currentPosition + lookAhead] == '\n')
993 // if (source[currentPosition + lookAhead] == '\"') {
994 // currentPosition += lookAhead + 1;
998 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1000 if (currentCharacter == '\\') {
1001 int escapeSize = currentPosition;
1002 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1003 //scanEscapeCharacter make a side effect on this value and we need
1004 // the previous value few lines down this one
1005 scanSingleQuotedEscapeCharacter();
1006 escapeSize = currentPosition - escapeSize;
1007 if (withoutUnicodePtr == 0) {
1008 //buffer all the entries that have been left aside....
1009 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1010 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1011 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1012 } else { //overwrite the / in the buffer
1013 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1014 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1015 // where only one is correct
1016 withoutUnicodePtr--;
1020 // consume next character
1021 unicodeAsBackSlash = false;
1022 currentCharacter = source[currentPosition++];
1023 // if (((currentCharacter = source[currentPosition++]) == '\\')
1024 // && (source[currentPosition] == 'u')) {
1025 // getNextUnicodeChar();
1027 if (withoutUnicodePtr != 0) {
1028 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1032 } catch (IndexOutOfBoundsException e) {
1033 // reset end position for error reporting
1034 currentPosition -= 2;
1035 throw new InvalidInputException(UNTERMINATED_STRING);
1036 } catch (InvalidInputException e) {
1037 if (e.getMessage().equals(INVALID_ESCAPE)) {
1038 // relocate if finding another quote fairly close: thus unicode
1039 // '/u000D' will be fully consumed
1040 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1041 if (currentPosition + lookAhead == source.length)
1043 if (source[currentPosition + lookAhead] == '\n')
1045 if (source[currentPosition + lookAhead] == '\'') {
1046 currentPosition += lookAhead + 1;
1053 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1054 // //$NON-NLS-?$ where ? is an
1056 if (currentLine == null) {
1057 currentLine = new NLSLine();
1058 lines.add(currentLine);
1060 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1064 public void consumeStringLiteral() throws InvalidInputException {
1066 // consume next character
1067 unicodeAsBackSlash = false;
1068 currentCharacter = source[currentPosition++];
1069 // if (((currentCharacter = source[currentPosition++]) == '\\')
1070 // && (source[currentPosition] == 'u')) {
1071 // getNextUnicodeChar();
1073 // if (withoutUnicodePtr != 0) {
1074 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1075 // currentCharacter;
1078 while (currentCharacter != '"') {
1079 /** ** in PHP \r and \n are valid in string literals *** */
1080 // if ((currentCharacter == '\n')
1081 // || (currentCharacter == '\r')) {
1082 // // relocate if finding another quote fairly close: thus unicode
1083 // '/u000D' will be fully consumed
1084 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1085 // if (currentPosition + lookAhead == source.length)
1087 // if (source[currentPosition + lookAhead] == '\n')
1089 // if (source[currentPosition + lookAhead] == '\"') {
1090 // currentPosition += lookAhead + 1;
1094 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1096 if (currentCharacter == '\\') {
1097 int escapeSize = currentPosition;
1098 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1099 //scanEscapeCharacter make a side effect on this value and we need
1100 // the previous value few lines down this one
1101 scanDoubleQuotedEscapeCharacter();
1102 escapeSize = currentPosition - escapeSize;
1103 if (withoutUnicodePtr == 0) {
1104 //buffer all the entries that have been left aside....
1105 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1106 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1107 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1108 } else { //overwrite the / in the buffer
1109 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1110 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1111 // where only one is correct
1112 withoutUnicodePtr--;
1116 // consume next character
1117 unicodeAsBackSlash = false;
1118 currentCharacter = source[currentPosition++];
1119 // if (((currentCharacter = source[currentPosition++]) == '\\')
1120 // && (source[currentPosition] == 'u')) {
1121 // getNextUnicodeChar();
1123 if (withoutUnicodePtr != 0) {
1124 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1128 } catch (IndexOutOfBoundsException e) {
1129 // reset end position for error reporting
1130 currentPosition -= 2;
1131 throw new InvalidInputException(UNTERMINATED_STRING);
1132 } catch (InvalidInputException e) {
1133 if (e.getMessage().equals(INVALID_ESCAPE)) {
1134 // relocate if finding another quote fairly close: thus unicode
1135 // '/u000D' will be fully consumed
1136 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1137 if (currentPosition + lookAhead == source.length)
1139 if (source[currentPosition + lookAhead] == '\n')
1141 if (source[currentPosition + lookAhead] == '\"') {
1142 currentPosition += lookAhead + 1;
1149 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1150 // //$NON-NLS-?$ where ? is an
1152 if (currentLine == null) {
1153 currentLine = new NLSLine();
1154 lines.add(currentLine);
1156 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1160 public int getNextToken() throws InvalidInputException {
1162 return getInlinedHTML(currentPosition);
1165 this.wasAcr = false;
1167 jumpOverMethodBody();
1169 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1173 withoutUnicodePtr = 0;
1174 //start with a new token
1175 char encapsedChar = ' ';
1176 if (!encapsedStringStack.isEmpty()) {
1177 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1179 if (encapsedChar != '$' && encapsedChar != ' ') {
1180 currentCharacter = source[currentPosition++];
1181 if (currentCharacter == encapsedChar) {
1182 switch (currentCharacter) {
1184 return TokenNameEncapsedString0;
1186 return TokenNameEncapsedString1;
1188 return TokenNameEncapsedString2;
1191 while (currentCharacter != encapsedChar) {
1192 /** ** in PHP \r and \n are valid in string literals *** */
1193 switch (currentCharacter) {
1195 int escapeSize = currentPosition;
1196 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1197 //scanEscapeCharacter make a side effect on this value and
1198 // we need the previous value few lines down this one
1199 scanDoubleQuotedEscapeCharacter();
1200 escapeSize = currentPosition - escapeSize;
1201 if (withoutUnicodePtr == 0) {
1202 //buffer all the entries that have been left aside....
1203 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1204 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1205 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1206 } else { //overwrite the / in the buffer
1207 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1208 if (backSlashAsUnicodeInString) { //there are TWO \ in
1209 withoutUnicodePtr--;
1214 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1216 encapsedStringStack.push(new Character('$'));
1217 return TokenNameSTRING;
1221 if (source[currentPosition] == '$') { // CURLY_OPEN
1223 encapsedStringStack.push(new Character('$'));
1224 return TokenNameSTRING;
1227 // consume next character
1228 unicodeAsBackSlash = false;
1229 currentCharacter = source[currentPosition++];
1230 if (withoutUnicodePtr != 0) {
1231 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1236 return TokenNameSTRING;
1238 // ---------Consume white space and handles startPosition---------
1239 int whiteStart = currentPosition;
1240 startPosition = currentPosition;
1241 currentCharacter = source[currentPosition++];
1242 if (encapsedChar == '$') {
1243 switch (currentCharacter) {
1245 currentCharacter = source[currentPosition++];
1246 return TokenNameSTRING;
1248 if (encapsedChar == '$') {
1249 if (getNextChar('$'))
1250 return TokenNameLBRACE_DOLLAR;
1252 return TokenNameLBRACE;
1254 return TokenNameRBRACE;
1256 return TokenNameLBRACKET;
1258 return TokenNameRBRACKET;
1260 if (tokenizeStrings) {
1261 consumeStringConstant();
1262 return TokenNameStringSingleQuote;
1264 return TokenNameEncapsedString1;
1266 return TokenNameEncapsedString2;
1268 if (tokenizeStrings) {
1269 consumeStringInterpolated();
1270 return TokenNameStringInterpolated;
1272 return TokenNameEncapsedString0;
1274 if (getNextChar('>'))
1275 return TokenNameMINUS_GREATER;
1276 return TokenNameSTRING;
1278 if (currentCharacter == '$') {
1279 int oldPosition = currentPosition;
1281 currentCharacter = source[currentPosition++];
1282 if (currentCharacter == '{') {
1283 return TokenNameDOLLAR_LBRACE;
1285 if (isPHPIdentifierStart(currentCharacter)) {
1286 return scanIdentifierOrKeyword(true);
1288 currentPosition = oldPosition;
1289 return TokenNameSTRING;
1291 } catch (IndexOutOfBoundsException e) {
1292 currentPosition = oldPosition;
1293 return TokenNameSTRING;
1296 if (isPHPIdentifierStart(currentCharacter))
1297 return scanIdentifierOrKeyword(false);
1298 if (Character.isDigit(currentCharacter))
1299 return scanNumber(false);
1300 return TokenNameERROR;
1303 // boolean isWhiteSpace;
1305 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1306 startPosition = currentPosition;
1307 currentCharacter = source[currentPosition++];
1308 // if (((currentCharacter = source[currentPosition++]) == '\\')
1309 // && (source[currentPosition] == 'u')) {
1310 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1312 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1313 checkNonExternalizeString();
1314 if (recordLineSeparator) {
1315 pushLineSeparator();
1320 // isWhiteSpace = (currentCharacter == ' ')
1321 // || Character.isWhitespace(currentCharacter);
1324 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1325 // reposition scanner in case we are interested by spaces as tokens
1327 startPosition = whiteStart;
1328 return TokenNameWHITESPACE;
1330 //little trick to get out in the middle of a source compuation
1331 if (currentPosition > eofPosition)
1332 return TokenNameEOF;
1333 // ---------Identify the next token-------------
1334 switch (currentCharacter) {
1336 return getCastOrParen();
1338 return TokenNameRPAREN;
1340 return TokenNameLBRACE;
1342 return TokenNameRBRACE;
1344 return TokenNameLBRACKET;
1346 return TokenNameRBRACKET;
1348 return TokenNameSEMICOLON;
1350 return TokenNameCOMMA;
1352 if (getNextChar('='))
1353 return TokenNameDOT_EQUAL;
1354 if (getNextCharAsDigit())
1355 return scanNumber(true);
1356 return TokenNameDOT;
1359 if ((test = getNextChar('+', '=')) == 0)
1360 return TokenNamePLUS_PLUS;
1362 return TokenNamePLUS_EQUAL;
1363 return TokenNamePLUS;
1367 if ((test = getNextChar('-', '=')) == 0)
1368 return TokenNameMINUS_MINUS;
1370 return TokenNameMINUS_EQUAL;
1371 if (getNextChar('>'))
1372 return TokenNameMINUS_GREATER;
1373 return TokenNameMINUS;
1376 if (getNextChar('='))
1377 return TokenNameTWIDDLE_EQUAL;
1378 return TokenNameTWIDDLE;
1380 if (getNextChar('=')) {
1381 if (getNextChar('=')) {
1382 return TokenNameNOT_EQUAL_EQUAL;
1384 return TokenNameNOT_EQUAL;
1386 return TokenNameNOT;
1388 if (getNextChar('='))
1389 return TokenNameMULTIPLY_EQUAL;
1390 return TokenNameMULTIPLY;
1392 if (getNextChar('='))
1393 return TokenNameREMAINDER_EQUAL;
1394 return TokenNameREMAINDER;
1396 int oldPosition = currentPosition;
1398 currentCharacter = source[currentPosition++];
1399 } catch (IndexOutOfBoundsException e) {
1400 currentPosition = oldPosition;
1401 return TokenNameLESS;
1403 switch (currentCharacter) {
1405 return TokenNameLESS_EQUAL;
1407 return TokenNameNOT_EQUAL;
1409 if (getNextChar('='))
1410 return TokenNameLEFT_SHIFT_EQUAL;
1411 if (getNextChar('<')) {
1412 currentCharacter = source[currentPosition++];
1413 while (Character.isWhitespace(currentCharacter)) {
1414 currentCharacter = source[currentPosition++];
1416 int heredocStart = currentPosition - 1;
1417 int heredocLength = 0;
1418 if (isPHPIdentifierStart(currentCharacter)) {
1419 currentCharacter = source[currentPosition++];
1421 return TokenNameERROR;
1423 while (isPHPIdentifierPart(currentCharacter)) {
1424 currentCharacter = source[currentPosition++];
1426 heredocLength = currentPosition - heredocStart - 1;
1427 // heredoc end-tag determination
1428 boolean endTag = true;
1431 ch = source[currentPosition++];
1432 if (ch == '\r' || ch == '\n') {
1433 if (recordLineSeparator) {
1434 pushLineSeparator();
1438 for (int i = 0; i < heredocLength; i++) {
1439 if (source[currentPosition + i] != source[heredocStart + i]) {
1445 currentPosition += heredocLength - 1;
1446 currentCharacter = source[currentPosition++];
1447 break; // do...while loop
1453 return TokenNameHEREDOC;
1455 return TokenNameLEFT_SHIFT;
1457 currentPosition = oldPosition;
1458 return TokenNameLESS;
1462 if ((test = getNextChar('=', '>')) == 0)
1463 return TokenNameGREATER_EQUAL;
1465 if ((test = getNextChar('=', '>')) == 0)
1466 return TokenNameRIGHT_SHIFT_EQUAL;
1467 return TokenNameRIGHT_SHIFT;
1469 return TokenNameGREATER;
1472 if (getNextChar('=')) {
1473 if (getNextChar('=')) {
1474 return TokenNameEQUAL_EQUAL_EQUAL;
1476 return TokenNameEQUAL_EQUAL;
1478 if (getNextChar('>'))
1479 return TokenNameEQUAL_GREATER;
1480 return TokenNameEQUAL;
1483 if ((test = getNextChar('&', '=')) == 0)
1484 return TokenNameAND_AND;
1486 return TokenNameAND_EQUAL;
1487 return TokenNameAND;
1491 if ((test = getNextChar('|', '=')) == 0)
1492 return TokenNameOR_OR;
1494 return TokenNameOR_EQUAL;
1498 if (getNextChar('='))
1499 return TokenNameXOR_EQUAL;
1500 return TokenNameXOR;
1502 if (getNextChar('>')) {
1504 if (currentPosition == source.length) {
1506 return TokenNameINLINE_HTML;
1508 return getInlinedHTML(currentPosition - 2);
1510 return TokenNameQUESTION;
1512 if (getNextChar(':'))
1513 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1514 return TokenNameCOLON;
1518 consumeStringConstant();
1519 return TokenNameStringSingleQuote;
1521 if (tokenizeStrings) {
1522 consumeStringLiteral();
1523 return TokenNameStringDoubleQuote;
1525 return TokenNameEncapsedString2;
1527 if (tokenizeStrings) {
1528 consumeStringInterpolated();
1529 return TokenNameStringInterpolated;
1531 return TokenNameEncapsedString0;
1534 char startChar = currentCharacter;
1535 if (getNextChar('=') && startChar=='/') {
1536 return TokenNameDIVIDE_EQUAL;
1539 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1541 this.lastCommentLinePosition = this.currentPosition;
1542 int endPositionForLineComment = 0;
1543 try { //get the next char
1544 currentCharacter = source[currentPosition++];
1545 // if (((currentCharacter = source[currentPosition++])
1547 // && (source[currentPosition] == 'u')) {
1548 // //-------------unicode traitement ------------
1549 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1550 // currentPosition++;
1551 // while (source[currentPosition] == 'u') {
1552 // currentPosition++;
1555 // Character.getNumericValue(source[currentPosition++]))
1559 // Character.getNumericValue(source[currentPosition++]))
1563 // Character.getNumericValue(source[currentPosition++]))
1567 // Character.getNumericValue(source[currentPosition++]))
1571 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1573 // currentCharacter =
1574 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1577 //handle the \\u case manually into comment
1578 // if (currentCharacter == '\\') {
1579 // if (source[currentPosition] == '\\')
1580 // currentPosition++;
1581 // } //jump over the \\
1582 boolean isUnicode = false;
1583 while (currentCharacter != '\r' && currentCharacter != '\n') {
1584 this.lastCommentLinePosition = this.currentPosition;
1585 if (currentCharacter == '?') {
1586 if (getNextChar('>')) {
1587 startPosition = currentPosition - 2;
1589 return TokenNameINLINE_HTML;
1594 currentCharacter = source[currentPosition++];
1595 // if (((currentCharacter = source[currentPosition++])
1597 // && (source[currentPosition] == 'u')) {
1598 // isUnicode = true;
1599 // //-------------unicode traitement ------------
1600 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1601 // currentPosition++;
1602 // while (source[currentPosition] == 'u') {
1603 // currentPosition++;
1606 // Character.getNumericValue(source[currentPosition++]))
1610 // Character.getNumericValue(
1611 // source[currentPosition++]))
1615 // Character.getNumericValue(
1616 // source[currentPosition++]))
1620 // Character.getNumericValue(
1621 // source[currentPosition++]))
1625 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1627 // currentCharacter =
1628 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1631 //handle the \\u case manually into comment
1632 // if (currentCharacter == '\\') {
1633 // if (source[currentPosition] == '\\')
1634 // currentPosition++;
1635 // } //jump over the \\
1638 endPositionForLineComment = currentPosition - 6;
1640 endPositionForLineComment = currentPosition - 1;
1642 // recordComment(false);
1643 recordComment(TokenNameCOMMENT_LINE);
1644 if (this.taskTags != null)
1645 checkTaskTag(this.startPosition, this.currentPosition);
1646 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1647 checkNonExternalizeString();
1648 if (recordLineSeparator) {
1650 pushUnicodeLineSeparator();
1652 pushLineSeparator();
1658 if (tokenizeComments) {
1660 currentPosition = endPositionForLineComment;
1661 // reset one character behind
1663 return TokenNameCOMMENT_LINE;
1665 } catch (IndexOutOfBoundsException e) { //an eof will them
1667 if (tokenizeComments) {
1669 // reset one character behind
1670 return TokenNameCOMMENT_LINE;
1676 //traditional and annotation comment
1677 boolean isJavadoc = false, star = false;
1678 // consume next character
1679 unicodeAsBackSlash = false;
1680 currentCharacter = source[currentPosition++];
1681 // if (((currentCharacter = source[currentPosition++]) ==
1683 // && (source[currentPosition] == 'u')) {
1684 // getNextUnicodeChar();
1686 // if (withoutUnicodePtr != 0) {
1687 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1688 // currentCharacter;
1691 if (currentCharacter == '*') {
1695 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1696 checkNonExternalizeString();
1697 if (recordLineSeparator) {
1698 pushLineSeparator();
1703 try { //get the next char
1704 currentCharacter = source[currentPosition++];
1705 // if (((currentCharacter = source[currentPosition++])
1707 // && (source[currentPosition] == 'u')) {
1708 // //-------------unicode traitement ------------
1709 // getNextUnicodeChar();
1711 //handle the \\u case manually into comment
1712 // if (currentCharacter == '\\') {
1713 // if (source[currentPosition] == '\\')
1714 // currentPosition++;
1715 // //jump over the \\
1717 // empty comment is not a javadoc /**/
1718 if (currentCharacter == '/') {
1721 //loop until end of comment */
1722 while ((currentCharacter != '/') || (!star)) {
1723 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1724 checkNonExternalizeString();
1725 if (recordLineSeparator) {
1726 pushLineSeparator();
1731 star = currentCharacter == '*';
1733 currentCharacter = source[currentPosition++];
1734 // if (((currentCharacter = source[currentPosition++])
1736 // && (source[currentPosition] == 'u')) {
1737 // //-------------unicode traitement ------------
1738 // getNextUnicodeChar();
1740 //handle the \\u case manually into comment
1741 // if (currentCharacter == '\\') {
1742 // if (source[currentPosition] == '\\')
1743 // currentPosition++;
1744 // } //jump over the \\
1746 //recordComment(isJavadoc);
1748 recordComment(TokenNameCOMMENT_PHPDOC);
1750 recordComment(TokenNameCOMMENT_BLOCK);
1753 if (tokenizeComments) {
1755 return TokenNameCOMMENT_PHPDOC;
1756 return TokenNameCOMMENT_BLOCK;
1759 if (this.taskTags != null) {
1760 checkTaskTag(this.startPosition, this.currentPosition);
1762 } catch (IndexOutOfBoundsException e) {
1763 // reset end position for error reporting
1764 currentPosition -= 2;
1765 throw new InvalidInputException(UNTERMINATED_COMMENT);
1769 return TokenNameDIVIDE;
1773 return TokenNameEOF;
1774 //the atEnd may not be <currentPosition == source.length> if
1775 // source is only some part of a real (external) stream
1776 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1778 if (currentCharacter == '$') {
1779 int oldPosition = currentPosition;
1781 currentCharacter = source[currentPosition++];
1782 if (isPHPIdentifierStart(currentCharacter)) {
1783 return scanIdentifierOrKeyword(true);
1785 currentPosition = oldPosition;
1786 return TokenNameDOLLAR;
1788 } catch (IndexOutOfBoundsException e) {
1789 currentPosition = oldPosition;
1790 return TokenNameDOLLAR;
1793 if (isPHPIdentifierStart(currentCharacter))
1794 return scanIdentifierOrKeyword(false);
1795 if (Character.isDigit(currentCharacter))
1796 return scanNumber(false);
1797 return TokenNameERROR;
1800 } //-----------------end switch while try--------------------
1801 catch (IndexOutOfBoundsException e) {
1804 return TokenNameEOF;
1807 private int getInlinedHTML(int start) throws InvalidInputException {
1808 int token = getInlinedHTMLToken(start);
1809 if (token == TokenNameINLINE_HTML) {
1810 // Stack stack = new Stack();
1811 // // scan html for errors
1812 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1813 // int lastPHPEndPos=0;
1814 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1815 // Tag tag=(Tag)i.next();
1817 // if (tag instanceof StartTag) {
1818 // StartTag startTag=(StartTag)tag;
1819 // // System.out.println("startTag: "+tag);
1820 // if (startTag.isServerTag()) {
1821 // // TODO : what to do with a server tag ?
1823 // // do whatever with HTML start tag
1824 // // use startTag.getElement() to find the element corresponding
1825 // // to this start tag which may be useful if you implement code
1827 // stack.push(startTag);
1830 // EndTag endTag=(EndTag)tag;
1831 // StartTag stag = (StartTag) stack.peek();
1832 //// System.out.println("endTag: "+tag);
1833 // // do whatever with HTML end tag.
1842 * @throws InvalidInputException
1844 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1845 // int htmlPosition = start;
1846 if (currentPosition > source.length) {
1847 currentPosition = source.length;
1848 return TokenNameEOF;
1850 startPosition = start;
1853 currentCharacter = source[currentPosition++];
1854 if (currentCharacter == '<') {
1855 if (getNextChar('?')) {
1856 currentCharacter = source[currentPosition++];
1857 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1859 if (ignorePHPOneLiner) {
1860 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1862 return TokenNameINLINE_HTML;
1866 return TokenNameINLINE_HTML;
1869 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1871 int test = getNextChar('H', 'h');
1873 test = getNextChar('P', 'p');
1876 if (ignorePHPOneLiner) {
1877 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1879 return TokenNameINLINE_HTML;
1883 return TokenNameINLINE_HTML;
1891 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1892 if (recordLineSeparator) {
1893 pushLineSeparator();
1898 } //-----------------while--------------------
1900 return TokenNameINLINE_HTML;
1901 } //-----------------try--------------------
1902 catch (IndexOutOfBoundsException e) {
1903 startPosition = start;
1907 return TokenNameINLINE_HTML;
1913 private int lookAheadLinePHPTag() {
1914 // check if the PHP is only in this line (for CodeFormatter)
1915 int currentPositionInLine = currentPosition;
1916 char previousCharInLine = ' ';
1917 char currentCharInLine = ' ';
1918 boolean singleQuotedStringActive = false;
1919 boolean doubleQuotedStringActive = false;
1922 // look ahead in this line
1924 previousCharInLine = currentCharInLine;
1925 currentCharInLine = source[currentPositionInLine++];
1926 switch (currentCharInLine) {
1928 if (previousCharInLine == '?') {
1929 // update the scanner's current Position in the source
1930 currentPosition = currentPositionInLine;
1931 // use as "dummy" token
1932 return TokenNameEOF;
1936 if (doubleQuotedStringActive) {
1937 if (previousCharInLine != '\\') {
1938 doubleQuotedStringActive = false;
1941 if (!singleQuotedStringActive) {
1942 doubleQuotedStringActive = true;
1947 if (singleQuotedStringActive) {
1948 if (previousCharInLine != '\\') {
1949 singleQuotedStringActive = false;
1952 if (!doubleQuotedStringActive) {
1953 singleQuotedStringActive = true;
1959 return TokenNameINLINE_HTML;
1961 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1963 return TokenNameINLINE_HTML;
1967 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1969 return TokenNameINLINE_HTML;
1973 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1975 return TokenNameINLINE_HTML;
1980 } catch (IndexOutOfBoundsException e) {
1982 currentPosition = currentPositionInLine;
1983 return TokenNameINLINE_HTML;
1987 // public final void getNextUnicodeChar()
1988 // throws IndexOutOfBoundsException, InvalidInputException {
1990 // //handle the case of unicode.
1991 // //when a unicode appears then we must use a buffer that holds char
1993 // //At the end of this method currentCharacter holds the new visited char
1994 // //and currentPosition points right next after it
1996 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1998 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1999 // currentPosition++;
2000 // while (source[currentPosition] == 'u') {
2001 // currentPosition++;
2005 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2007 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2009 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2011 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2013 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2015 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2016 // //need the unicode buffer
2017 // if (withoutUnicodePtr == 0) {
2018 // //buffer all the entries that have been left aside....
2019 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2020 // System.arraycopy(
2023 // withoutUnicodeBuffer,
2025 // withoutUnicodePtr);
2027 // //fill the buffer with the char
2028 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2030 // unicodeAsBackSlash = currentCharacter == '\\';
2033 * Tokenize a method body, assuming that curly brackets are properly balanced.
2035 public final void jumpOverMethodBody() {
2036 this.wasAcr = false;
2039 while (true) { //loop for jumping over comments
2040 // ---------Consume white space and handles startPosition---------
2041 boolean isWhiteSpace;
2043 startPosition = currentPosition;
2044 currentCharacter = source[currentPosition++];
2045 // if (((currentCharacter = source[currentPosition++]) == '\\')
2046 // && (source[currentPosition] == 'u')) {
2047 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2049 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2050 pushLineSeparator();
2051 isWhiteSpace = Character.isWhitespace(currentCharacter);
2053 } while (isWhiteSpace);
2054 // -------consume token until } is found---------
2055 switch (currentCharacter) {
2066 test = getNextChar('\\');
2069 scanDoubleQuotedEscapeCharacter();
2070 } catch (InvalidInputException ex) {
2074 // try { // consume next character
2075 unicodeAsBackSlash = false;
2076 currentCharacter = source[currentPosition++];
2077 // if (((currentCharacter = source[currentPosition++]) == '\\')
2078 // && (source[currentPosition] == 'u')) {
2079 // getNextUnicodeChar();
2081 if (withoutUnicodePtr != 0) {
2082 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2085 // } catch (InvalidInputException ex) {
2093 // try { // consume next character
2094 unicodeAsBackSlash = false;
2095 currentCharacter = source[currentPosition++];
2096 // if (((currentCharacter = source[currentPosition++]) == '\\')
2097 // && (source[currentPosition] == 'u')) {
2098 // getNextUnicodeChar();
2100 if (withoutUnicodePtr != 0) {
2101 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2104 // } catch (InvalidInputException ex) {
2106 while (currentCharacter != '"') {
2107 if (currentCharacter == '\r') {
2108 if (source[currentPosition] == '\n')
2111 // the string cannot go further that the line
2113 if (currentCharacter == '\n') {
2115 // the string cannot go further that the line
2117 if (currentCharacter == '\\') {
2119 scanDoubleQuotedEscapeCharacter();
2120 } catch (InvalidInputException ex) {
2124 // try { // consume next character
2125 unicodeAsBackSlash = false;
2126 currentCharacter = source[currentPosition++];
2127 // if (((currentCharacter = source[currentPosition++]) == '\\')
2128 // && (source[currentPosition] == 'u')) {
2129 // getNextUnicodeChar();
2131 if (withoutUnicodePtr != 0) {
2132 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2135 // } catch (InvalidInputException ex) {
2138 } catch (IndexOutOfBoundsException e) {
2144 if ((test = getNextChar('/', '*')) == 0) {
2148 currentCharacter = source[currentPosition++];
2149 // if (((currentCharacter = source[currentPosition++]) ==
2151 // && (source[currentPosition] == 'u')) {
2152 // //-------------unicode traitement ------------
2153 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2154 // currentPosition++;
2155 // while (source[currentPosition] == 'u') {
2156 // currentPosition++;
2159 // Character.getNumericValue(source[currentPosition++]))
2163 // Character.getNumericValue(source[currentPosition++]))
2167 // Character.getNumericValue(source[currentPosition++]))
2171 // Character.getNumericValue(source[currentPosition++]))
2174 // //error don't care of the value
2175 // currentCharacter = 'A';
2176 // } //something different from \n and \r
2178 // currentCharacter =
2179 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2182 while (currentCharacter != '\r' && currentCharacter != '\n') {
2184 currentCharacter = source[currentPosition++];
2185 // if (((currentCharacter = source[currentPosition++])
2187 // && (source[currentPosition] == 'u')) {
2188 // //-------------unicode traitement ------------
2189 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2190 // currentPosition++;
2191 // while (source[currentPosition] == 'u') {
2192 // currentPosition++;
2195 // Character.getNumericValue(source[currentPosition++]))
2199 // Character.getNumericValue(source[currentPosition++]))
2203 // Character.getNumericValue(source[currentPosition++]))
2207 // Character.getNumericValue(source[currentPosition++]))
2210 // //error don't care of the value
2211 // currentCharacter = 'A';
2212 // } //something different from \n and \r
2214 // currentCharacter =
2215 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2219 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2220 pushLineSeparator();
2221 } catch (IndexOutOfBoundsException e) {
2222 } //an eof will them be generated
2226 //traditional and annotation comment
2227 boolean star = false;
2228 // try { // consume next character
2229 unicodeAsBackSlash = false;
2230 currentCharacter = source[currentPosition++];
2231 // if (((currentCharacter = source[currentPosition++]) == '\\')
2232 // && (source[currentPosition] == 'u')) {
2233 // getNextUnicodeChar();
2235 if (withoutUnicodePtr != 0) {
2236 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2239 // } catch (InvalidInputException ex) {
2241 if (currentCharacter == '*') {
2244 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2245 pushLineSeparator();
2246 try { //get the next char
2247 currentCharacter = source[currentPosition++];
2248 // if (((currentCharacter = source[currentPosition++]) ==
2250 // && (source[currentPosition] == 'u')) {
2251 // //-------------unicode traitement ------------
2252 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2253 // currentPosition++;
2254 // while (source[currentPosition] == 'u') {
2255 // currentPosition++;
2258 // Character.getNumericValue(source[currentPosition++]))
2262 // Character.getNumericValue(source[currentPosition++]))
2266 // Character.getNumericValue(source[currentPosition++]))
2270 // Character.getNumericValue(source[currentPosition++]))
2273 // //error don't care of the value
2274 // currentCharacter = 'A';
2275 // } //something different from * and /
2277 // currentCharacter =
2278 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2281 //loop until end of comment */
2282 while ((currentCharacter != '/') || (!star)) {
2283 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2284 pushLineSeparator();
2285 star = currentCharacter == '*';
2287 currentCharacter = source[currentPosition++];
2288 // if (((currentCharacter = source[currentPosition++])
2290 // && (source[currentPosition] == 'u')) {
2291 // //-------------unicode traitement ------------
2292 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2293 // currentPosition++;
2294 // while (source[currentPosition] == 'u') {
2295 // currentPosition++;
2298 // Character.getNumericValue(source[currentPosition++]))
2302 // Character.getNumericValue(source[currentPosition++]))
2306 // Character.getNumericValue(source[currentPosition++]))
2310 // Character.getNumericValue(source[currentPosition++]))
2313 // //error don't care of the value
2314 // currentCharacter = 'A';
2315 // } //something different from * and /
2317 // currentCharacter =
2318 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2322 } catch (IndexOutOfBoundsException e) {
2330 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2332 scanIdentifierOrKeyword((currentCharacter == '$'));
2333 } catch (InvalidInputException ex) {
2338 if (Character.isDigit(currentCharacter)) {
2341 } catch (InvalidInputException ex) {
2348 //-----------------end switch while try--------------------
2349 } catch (IndexOutOfBoundsException e) {
2350 } catch (InvalidInputException e) {
2355 // public final boolean jumpOverUnicodeWhiteSpace()
2356 // throws InvalidInputException {
2358 // //handle the case of unicode. Jump over the next whiteSpace
2359 // //making startPosition pointing on the next available char
2360 // //On false, the currentCharacter is filled up with a potential
2364 // this.wasAcr = false;
2365 // int c1, c2, c3, c4;
2366 // int unicodeSize = 6;
2367 // currentPosition++;
2368 // while (source[currentPosition] == 'u') {
2369 // currentPosition++;
2373 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2375 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2377 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2379 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2381 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2384 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2385 // if (recordLineSeparator
2386 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2387 // pushLineSeparator();
2388 // if (Character.isWhitespace(currentCharacter))
2391 // //buffer the new char which is not a white space
2392 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2393 // //withoutUnicodePtr == 1 is true here
2395 // } catch (IndexOutOfBoundsException e) {
2396 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2399 public final int[] getLineEnds() {
2400 //return a bounded copy of this.lineEnds
2402 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2406 public char[] getSource() {
2410 public static boolean isIdentifierOrKeyword(int token) {
2411 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2414 final char[] optimizedCurrentTokenSource1() {
2415 //return always the same char[] build only once
2416 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2417 char charOne = source[startPosition];
2472 return new char[] { charOne };
2476 final char[] optimizedCurrentTokenSource2() {
2477 //try to return the same char[] build only once
2479 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2480 char[][] table = charArray_length[0][hash];
2482 while (++i < InternalTableSize) {
2483 char[] charArray = table[i];
2484 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2487 //---------other side---------
2489 int max = newEntry2;
2490 while (++i <= max) {
2491 char[] charArray = table[i];
2492 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2495 //--------add the entry-------
2496 if (++max >= InternalTableSize)
2499 table[max] = (r = new char[] { c0, c1 });
2504 final char[] optimizedCurrentTokenSource3() {
2505 //try to return the same char[] build only once
2507 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2509 char[][] table = charArray_length[1][hash];
2511 while (++i < InternalTableSize) {
2512 char[] charArray = table[i];
2513 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2516 //---------other side---------
2518 int max = newEntry3;
2519 while (++i <= max) {
2520 char[] charArray = table[i];
2521 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2524 //--------add the entry-------
2525 if (++max >= InternalTableSize)
2528 table[max] = (r = new char[] { c0, c1, c2 });
2533 final char[] optimizedCurrentTokenSource4() {
2534 //try to return the same char[] build only once
2535 char c0, c1, c2, c3;
2536 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2537 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2539 char[][] table = charArray_length[2][(int) hash];
2541 while (++i < InternalTableSize) {
2542 char[] charArray = table[i];
2543 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2546 //---------other side---------
2548 int max = newEntry4;
2549 while (++i <= max) {
2550 char[] charArray = table[i];
2551 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2554 //--------add the entry-------
2555 if (++max >= InternalTableSize)
2558 table[max] = (r = new char[] { c0, c1, c2, c3 });
2563 final char[] optimizedCurrentTokenSource5() {
2564 //try to return the same char[] build only once
2565 char c0, c1, c2, c3, c4;
2566 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2567 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2569 char[][] table = charArray_length[3][(int) hash];
2571 while (++i < InternalTableSize) {
2572 char[] charArray = table[i];
2573 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2576 //---------other side---------
2578 int max = newEntry5;
2579 while (++i <= max) {
2580 char[] charArray = table[i];
2581 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2584 //--------add the entry-------
2585 if (++max >= InternalTableSize)
2588 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2593 final char[] optimizedCurrentTokenSource6() {
2594 //try to return the same char[] build only once
2595 char c0, c1, c2, c3, c4, c5;
2596 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2597 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2598 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2600 char[][] table = charArray_length[4][(int) hash];
2602 while (++i < InternalTableSize) {
2603 char[] charArray = table[i];
2604 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2605 && (c5 == charArray[5]))
2608 //---------other side---------
2610 int max = newEntry6;
2611 while (++i <= max) {
2612 char[] charArray = table[i];
2613 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2614 && (c5 == charArray[5]))
2617 //--------add the entry-------
2618 if (++max >= InternalTableSize)
2621 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2626 public final void pushLineSeparator() throws InvalidInputException {
2627 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2628 final int INCREMENT = 250;
2629 if (this.checkNonExternalizedStringLiterals) {
2630 // reinitialize the current line for non externalize strings purpose
2633 //currentCharacter is at position currentPosition-1
2635 if (currentCharacter == '\r') {
2636 int separatorPos = currentPosition - 1;
2637 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2639 //System.out.println("CR-" + separatorPos);
2641 lineEnds[++linePtr] = separatorPos;
2642 } catch (IndexOutOfBoundsException e) {
2643 //linePtr value is correct
2644 int oldLength = lineEnds.length;
2645 int[] old = lineEnds;
2646 lineEnds = new int[oldLength + INCREMENT];
2647 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2648 lineEnds[linePtr] = separatorPos;
2650 // look-ahead for merged cr+lf
2652 if (source[currentPosition] == '\n') {
2653 //System.out.println("look-ahead LF-" + currentPosition);
2654 lineEnds[linePtr] = currentPosition;
2660 } catch (IndexOutOfBoundsException e) {
2665 if (currentCharacter == '\n') {
2666 //must merge eventual cr followed by lf
2667 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2668 //System.out.println("merge LF-" + (currentPosition - 1));
2669 lineEnds[linePtr] = currentPosition - 1;
2671 int separatorPos = currentPosition - 1;
2672 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2674 // System.out.println("LF-" + separatorPos);
2676 lineEnds[++linePtr] = separatorPos;
2677 } catch (IndexOutOfBoundsException e) {
2678 //linePtr value is correct
2679 int oldLength = lineEnds.length;
2680 int[] old = lineEnds;
2681 lineEnds = new int[oldLength + INCREMENT];
2682 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2683 lineEnds[linePtr] = separatorPos;
2691 public final void pushUnicodeLineSeparator() {
2692 // isUnicode means that the \r or \n has been read as a unicode character
2693 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2694 final int INCREMENT = 250;
2695 //currentCharacter is at position currentPosition-1
2696 if (this.checkNonExternalizedStringLiterals) {
2697 // reinitialize the current line for non externalize strings purpose
2701 if (currentCharacter == '\r') {
2702 int separatorPos = currentPosition - 6;
2703 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2705 //System.out.println("CR-" + separatorPos);
2707 lineEnds[++linePtr] = separatorPos;
2708 } catch (IndexOutOfBoundsException e) {
2709 //linePtr value is correct
2710 int oldLength = lineEnds.length;
2711 int[] old = lineEnds;
2712 lineEnds = new int[oldLength + INCREMENT];
2713 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2714 lineEnds[linePtr] = separatorPos;
2716 // look-ahead for merged cr+lf
2717 if (source[currentPosition] == '\n') {
2718 //System.out.println("look-ahead LF-" + currentPosition);
2719 lineEnds[linePtr] = currentPosition;
2727 if (currentCharacter == '\n') {
2728 //must merge eventual cr followed by lf
2729 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2730 //System.out.println("merge LF-" + (currentPosition - 1));
2731 lineEnds[linePtr] = currentPosition - 6;
2733 int separatorPos = currentPosition - 6;
2734 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2736 // System.out.println("LF-" + separatorPos);
2738 lineEnds[++linePtr] = separatorPos;
2739 } catch (IndexOutOfBoundsException e) {
2740 //linePtr value is correct
2741 int oldLength = lineEnds.length;
2742 int[] old = lineEnds;
2743 lineEnds = new int[oldLength + INCREMENT];
2744 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2745 lineEnds[linePtr] = separatorPos;
2753 public void recordComment(int token) {
2755 int stopPosition = this.currentPosition;
2757 case TokenNameCOMMENT_LINE:
2758 stopPosition = -this.lastCommentLinePosition;
2760 case TokenNameCOMMENT_BLOCK:
2761 stopPosition = -this.currentPosition;
2765 // a new comment is recorded
2766 int length = this.commentStops.length;
2767 if (++this.commentPtr >= length) {
2768 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2769 //grows the positions buffers too
2770 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2772 this.commentStops[this.commentPtr] = stopPosition;
2773 this.commentStarts[this.commentPtr] = this.startPosition;
2776 // public final void recordComment(boolean isJavadoc) {
2777 // // a new annotation comment is recorded
2779 // commentStops[++commentPtr] = isJavadoc
2780 // ? currentPosition
2781 // : -currentPosition;
2782 // } catch (IndexOutOfBoundsException e) {
2783 // int oldStackLength = commentStops.length;
2784 // int[] oldStack = commentStops;
2785 // commentStops = new int[oldStackLength + 30];
2786 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2787 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2788 // //grows the positions buffers too
2789 // int[] old = commentStarts;
2790 // commentStarts = new int[oldStackLength + 30];
2791 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2793 // //the buffer is of a correct size here
2794 // commentStarts[commentPtr] = startPosition;
2796 public void resetTo(int begin, int end) {
2797 //reset the scanner to a given position where it may rescan again
2799 initialPosition = startPosition = currentPosition = begin;
2800 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2801 commentPtr = -1; // reset comment stack
2804 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2805 // the string with "\\u" is a legal string of two chars \ and u
2806 //thus we use a direct access to the source (for regular cases).
2807 // if (unicodeAsBackSlash) {
2808 // // consume next character
2809 // unicodeAsBackSlash = false;
2810 // if (((currentCharacter = source[currentPosition++]) == '\\')
2811 // && (source[currentPosition] == 'u')) {
2812 // getNextUnicodeChar();
2814 // if (withoutUnicodePtr != 0) {
2815 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2819 currentCharacter = source[currentPosition++];
2820 switch (currentCharacter) {
2822 currentCharacter = '\'';
2825 currentCharacter = '\\';
2828 currentCharacter = '\\';
2833 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2834 // the string with "\\u" is a legal string of two chars \ and u
2835 //thus we use a direct access to the source (for regular cases).
2836 // if (unicodeAsBackSlash) {
2837 // // consume next character
2838 // unicodeAsBackSlash = false;
2839 // if (((currentCharacter = source[currentPosition++]) == '\\')
2840 // && (source[currentPosition] == 'u')) {
2841 // getNextUnicodeChar();
2843 // if (withoutUnicodePtr != 0) {
2844 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2848 currentCharacter = source[currentPosition++];
2849 switch (currentCharacter) {
2851 // currentCharacter = '\b';
2854 currentCharacter = '\t';
2857 currentCharacter = '\n';
2860 // currentCharacter = '\f';
2863 currentCharacter = '\r';
2866 currentCharacter = '\"';
2869 currentCharacter = '\'';
2872 currentCharacter = '\\';
2875 currentCharacter = '$';
2878 // -----------octal escape--------------
2880 // OctalDigit OctalDigit
2881 // ZeroToThree OctalDigit OctalDigit
2882 int number = Character.getNumericValue(currentCharacter);
2883 if (number >= 0 && number <= 7) {
2884 boolean zeroToThreeNot = number > 3;
2885 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2886 int digit = Character.getNumericValue(currentCharacter);
2887 if (digit >= 0 && digit <= 7) {
2888 number = (number * 8) + digit;
2889 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2890 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2891 // Digit --> ignore last character
2894 digit = Character.getNumericValue(currentCharacter);
2895 if (digit >= 0 && digit <= 7) {
2896 // has read \ZeroToThree OctalDigit OctalDigit
2897 number = (number * 8) + digit;
2898 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2899 // --> ignore last character
2903 } else { // has read \OctalDigit NonDigit--> ignore last
2907 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2911 } else { // has read \OctalDigit --> ignore last character
2915 throw new InvalidInputException(INVALID_ESCAPE);
2916 currentCharacter = (char) number;
2919 // throw new InvalidInputException(INVALID_ESCAPE);
2923 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2924 // return scanIdentifierOrKeyword( false );
2926 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2928 //first dispatch on the first char.
2929 //then the length. If there are several
2930 //keywors with the same length AND the same first char, then do another
2931 //disptach on the second char :-)...cool....but fast !
2932 useAssertAsAnIndentifier = false;
2933 while (getNextCharAsJavaIdentifierPart()) {
2937 // if (new String(getCurrentTokenSource()).equals("$this")) {
2938 // return TokenNamethis;
2940 return TokenNameVariable;
2945 // if (withoutUnicodePtr == 0)
2946 //quick test on length == 1 but not on length > 12 while most identifier
2947 //have a length which is <= 12...but there are lots of identifier with
2950 if ((length = currentPosition - startPosition) == 1)
2951 return TokenNameIdentifier;
2953 data = new char[length];
2954 index = startPosition;
2955 for (int i = 0; i < length; i++) {
2956 data[i] = Character.toLowerCase(source[index + i]);
2960 // if ((length = withoutUnicodePtr) == 1)
2961 // return TokenNameIdentifier;
2962 // // data = withoutUnicodeBuffer;
2963 // data = new char[withoutUnicodeBuffer.length];
2964 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2965 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2969 firstLetter = data[index];
2970 switch (firstLetter) {
2975 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
2976 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2977 return TokenNameFILE;
2978 index = 0; //__LINE__
2979 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
2980 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
2981 return TokenNameLINE;
2985 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
2986 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
2987 return TokenNameCLASS_C;
2991 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
2992 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
2993 && (data[++index] == '_'))
2994 return TokenNameMETHOD_C;
2998 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
2999 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3000 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3001 return TokenNameFUNC_C;
3004 return TokenNameIdentifier;
3006 // as and array abstract
3010 if ((data[++index] == 's')) {
3013 return TokenNameIdentifier;
3017 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3018 return TokenNameand;
3020 return TokenNameIdentifier;
3024 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3025 return TokenNamearray;
3027 return TokenNameIdentifier;
3029 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3030 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3031 return TokenNameabstract;
3033 return TokenNameIdentifier;
3035 return TokenNameIdentifier;
3041 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3042 return TokenNamebreak;
3044 return TokenNameIdentifier;
3046 return TokenNameIdentifier;
3049 //case catch class clone const continue
3052 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3053 return TokenNamecase;
3055 return TokenNameIdentifier;
3057 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3058 return TokenNamecatch;
3060 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3061 return TokenNameclass;
3063 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3064 return TokenNameclone;
3066 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3067 return TokenNameconst;
3069 return TokenNameIdentifier;
3071 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3072 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3073 return TokenNamecontinue;
3075 return TokenNameIdentifier;
3077 return TokenNameIdentifier;
3080 // declare default do die
3081 // TODO delete define ==> no keyword !
3084 if ((data[++index] == 'o'))
3087 return TokenNameIdentifier;
3089 // if ((data[++index] == 'e')
3090 // && (data[++index] == 'f')
3091 // && (data[++index] == 'i')
3092 // && (data[++index] == 'n')
3093 // && (data[++index] == 'e'))
3094 // return TokenNamedefine;
3096 // return TokenNameIdentifier;
3098 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3099 && (data[++index] == 'r') && (data[++index] == 'e'))
3100 return TokenNamedeclare;
3102 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3103 && (data[++index] == 'l') && (data[++index] == 't'))
3104 return TokenNamedefault;
3106 return TokenNameIdentifier;
3108 return TokenNameIdentifier;
3111 //echo else exit elseif extends eval
3114 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3115 return TokenNameecho;
3116 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3117 return TokenNameelse;
3118 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3119 return TokenNameexit;
3120 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3121 return TokenNameeval;
3123 return TokenNameIdentifier;
3126 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3127 return TokenNameendif;
3128 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3129 return TokenNameempty;
3131 return TokenNameIdentifier;
3134 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3135 && (data[++index] == 'r'))
3136 return TokenNameendfor;
3137 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3138 && (data[++index] == 'f'))
3139 return TokenNameelseif;
3141 return TokenNameIdentifier;
3143 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3144 && (data[++index] == 'd') && (data[++index] == 's'))
3145 return TokenNameextends;
3147 return TokenNameIdentifier;
3150 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3151 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3152 return TokenNameendwhile;
3154 return TokenNameIdentifier;
3157 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3158 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3159 return TokenNameendswitch;
3161 return TokenNameIdentifier;
3164 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3165 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3166 && (data[++index] == 'e'))
3167 return TokenNameenddeclare;
3169 if ((data[++index] == 'n') // endforeach
3170 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3171 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3172 return TokenNameendforeach;
3174 return TokenNameIdentifier;
3176 return TokenNameIdentifier;
3179 //for false final function
3182 if ((data[++index] == 'o') && (data[++index] == 'r'))
3183 return TokenNamefor;
3185 return TokenNameIdentifier;
3187 // if ((data[++index] == 'a') && (data[++index] == 'l')
3188 // && (data[++index] == 's') && (data[++index] == 'e'))
3189 // return TokenNamefalse;
3190 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3191 return TokenNamefinal;
3193 return TokenNameIdentifier;
3196 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3197 && (data[++index] == 'c') && (data[++index] == 'h'))
3198 return TokenNameforeach;
3200 return TokenNameIdentifier;
3203 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3204 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3205 return TokenNamefunction;
3207 return TokenNameIdentifier;
3209 return TokenNameIdentifier;
3214 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3215 && (data[++index] == 'l')) {
3216 return TokenNameglobal;
3219 return TokenNameIdentifier;
3221 //if int isset include include_once instanceof interface implements
3224 if (data[++index] == 'f')
3227 return TokenNameIdentifier;
3229 // if ((data[++index] == 'n') && (data[++index] == 't'))
3230 // return TokenNameint;
3232 // return TokenNameIdentifier;
3234 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3235 return TokenNameisset;
3237 return TokenNameIdentifier;
3239 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3240 && (data[++index] == 'd') && (data[++index] == 'e'))
3241 return TokenNameinclude;
3243 return TokenNameIdentifier;
3246 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3247 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3248 return TokenNameinterface;
3250 return TokenNameIdentifier;
3253 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3254 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3255 && (data[++index] == 'f'))
3256 return TokenNameinstanceof;
3257 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3258 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3259 && (data[++index] == 's'))
3260 return TokenNameimplements;
3262 return TokenNameIdentifier;
3264 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3265 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3266 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3267 return TokenNameinclude_once;
3269 return TokenNameIdentifier;
3271 return TokenNameIdentifier;
3276 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3277 return TokenNamelist;
3280 return TokenNameIdentifier;
3285 if ((data[++index] == 'e') && (data[++index] == 'w'))
3286 return TokenNamenew;
3288 return TokenNameIdentifier;
3290 // if ((data[++index] == 'u') && (data[++index] == 'l')
3291 // && (data[++index] == 'l'))
3292 // return TokenNamenull;
3294 // return TokenNameIdentifier;
3296 return TokenNameIdentifier;
3301 if (data[++index] == 'r') {
3305 // if (length == 12) {
3306 // if ((data[++index] == 'l')
3307 // && (data[++index] == 'd')
3308 // && (data[++index] == '_')
3309 // && (data[++index] == 'f')
3310 // && (data[++index] == 'u')
3311 // && (data[++index] == 'n')
3312 // && (data[++index] == 'c')
3313 // && (data[++index] == 't')
3314 // && (data[++index] == 'i')
3315 // && (data[++index] == 'o')
3316 // && (data[++index] == 'n')) {
3317 // return TokenNameold_function;
3320 return TokenNameIdentifier;
3322 // print public private protected
3325 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3326 return TokenNameprint;
3328 return TokenNameIdentifier;
3330 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3331 && (data[++index] == 'c')) {
3332 return TokenNamepublic;
3334 return TokenNameIdentifier;
3336 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3337 && (data[++index] == 't') && (data[++index] == 'e')) {
3338 return TokenNameprivate;
3340 return TokenNameIdentifier;
3342 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3343 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3344 return TokenNameprotected;
3346 return TokenNameIdentifier;
3348 return TokenNameIdentifier;
3350 //return require require_once
3352 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3353 && (data[++index] == 'n')) {
3354 return TokenNamereturn;
3356 } else if (length == 7) {
3357 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3358 && (data[++index] == 'r') && (data[++index] == 'e')) {
3359 return TokenNamerequire;
3361 } else if (length == 12) {
3362 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3363 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3364 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3365 return TokenNamerequire_once;
3368 return TokenNameIdentifier;
3373 if (data[++index] == 't')
3374 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3375 return TokenNamestatic;
3377 return TokenNameIdentifier;
3378 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3379 && (data[++index] == 'h'))
3380 return TokenNameswitch;
3382 return TokenNameIdentifier;
3384 return TokenNameIdentifier;
3390 if ((data[++index] == 'r') && (data[++index] == 'y'))
3391 return TokenNametry;
3393 return TokenNameIdentifier;
3395 // if ((data[++index] == 'r') && (data[++index] == 'u')
3396 // && (data[++index] == 'e'))
3397 // return TokenNametrue;
3399 // return TokenNameIdentifier;
3401 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3402 return TokenNamethrow;
3404 return TokenNameIdentifier;
3406 return TokenNameIdentifier;
3412 if ((data[++index] == 's') && (data[++index] == 'e'))
3413 return TokenNameuse;
3415 return TokenNameIdentifier;
3417 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3418 return TokenNameunset;
3420 return TokenNameIdentifier;
3422 return TokenNameIdentifier;
3428 if ((data[++index] == 'a') && (data[++index] == 'r'))
3429 return TokenNamevar;
3431 return TokenNameIdentifier;
3433 return TokenNameIdentifier;
3439 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3440 return TokenNamewhile;
3442 return TokenNameIdentifier;
3443 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3444 // (data[++index]=='e') && (data[++index]=='f')&&
3445 // (data[++index]=='p'))
3446 //return TokenNamewidefp ;
3448 //return TokenNameIdentifier;
3450 return TokenNameIdentifier;
3456 if ((data[++index] == 'o') && (data[++index] == 'r'))
3457 return TokenNamexor;
3459 return TokenNameIdentifier;
3461 return TokenNameIdentifier;
3464 return TokenNameIdentifier;
3468 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3469 //when entering this method the currentCharacter is the firt
3470 //digit of the number , i.e. it may be preceeded by a . when
3472 boolean floating = dotPrefix;
3473 if ((!dotPrefix) && (currentCharacter == '0')) {
3474 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3475 //force the first char of the hexa number do exist...
3476 // consume next character
3477 unicodeAsBackSlash = false;
3478 currentCharacter = source[currentPosition++];
3479 // if (((currentCharacter = source[currentPosition++]) == '\\')
3480 // && (source[currentPosition] == 'u')) {
3481 // getNextUnicodeChar();
3483 // if (withoutUnicodePtr != 0) {
3484 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3487 if (Character.digit(currentCharacter, 16) == -1)
3488 throw new InvalidInputException(INVALID_HEXA);
3490 while (getNextCharAsDigit(16)) {
3493 // if (getNextChar('l', 'L') >= 0)
3494 // return TokenNameLongLiteral;
3496 return TokenNameIntegerLiteral;
3498 //there is x or X in the number
3499 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3500 // 00078.0 is true !!!!! crazy language
3501 if (getNextCharAsDigit()) {
3502 //-------------potential octal-----------------
3503 while (getNextCharAsDigit()) {
3506 // if (getNextChar('l', 'L') >= 0) {
3507 // return TokenNameLongLiteral;
3510 // if (getNextChar('f', 'F') >= 0) {
3511 // return TokenNameFloatingPointLiteral;
3513 if (getNextChar('d', 'D') >= 0) {
3514 return TokenNameDoubleLiteral;
3515 } else { //make the distinction between octal and float ....
3516 if (getNextChar('.')) { //bingo ! ....
3517 while (getNextCharAsDigit()) {
3520 if (getNextChar('e', 'E') >= 0) {
3521 // consume next character
3522 unicodeAsBackSlash = false;
3523 currentCharacter = source[currentPosition++];
3524 // if (((currentCharacter = source[currentPosition++]) == '\\')
3525 // && (source[currentPosition] == 'u')) {
3526 // getNextUnicodeChar();
3528 // if (withoutUnicodePtr != 0) {
3529 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3532 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3533 // consume next character
3534 unicodeAsBackSlash = false;
3535 currentCharacter = source[currentPosition++];
3536 // if (((currentCharacter = source[currentPosition++]) == '\\')
3537 // && (source[currentPosition] == 'u')) {
3538 // getNextUnicodeChar();
3540 // if (withoutUnicodePtr != 0) {
3541 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3542 // currentCharacter;
3546 if (!Character.isDigit(currentCharacter))
3547 throw new InvalidInputException(INVALID_FLOAT);
3548 while (getNextCharAsDigit()) {
3552 // if (getNextChar('f', 'F') >= 0)
3553 // return TokenNameFloatingPointLiteral;
3554 getNextChar('d', 'D'); //jump over potential d or D
3555 return TokenNameDoubleLiteral;
3557 return TokenNameIntegerLiteral;
3564 while (getNextCharAsDigit()) {
3567 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3568 // return TokenNameLongLiteral;
3569 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3570 while (getNextCharAsDigit()) {
3575 //if floating is true both exponant and suffix may be optional
3576 if (getNextChar('e', 'E') >= 0) {
3578 // consume next character
3579 unicodeAsBackSlash = false;
3580 currentCharacter = source[currentPosition++];
3581 // if (((currentCharacter = source[currentPosition++]) == '\\')
3582 // && (source[currentPosition] == 'u')) {
3583 // getNextUnicodeChar();
3585 // if (withoutUnicodePtr != 0) {
3586 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3589 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3592 unicodeAsBackSlash = false;
3593 currentCharacter = source[currentPosition++];
3594 // if (((currentCharacter = source[currentPosition++]) == '\\')
3595 // && (source[currentPosition] == 'u')) {
3596 // getNextUnicodeChar();
3598 // if (withoutUnicodePtr != 0) {
3599 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3603 if (!Character.isDigit(currentCharacter))
3604 throw new InvalidInputException(INVALID_FLOAT);
3605 while (getNextCharAsDigit()) {
3609 if (getNextChar('d', 'D') >= 0)
3610 return TokenNameDoubleLiteral;
3611 // if (getNextChar('f', 'F') >= 0)
3612 // return TokenNameFloatingPointLiteral;
3613 //the long flag has been tested before
3614 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3618 * Search the line number corresponding to a specific position
3621 public final int getLineNumber(int position) {
3622 if (lineEnds == null)
3624 int length = linePtr + 1;
3627 int g = 0, d = length - 1;
3631 if (position < lineEnds[m]) {
3633 } else if (position > lineEnds[m]) {
3639 if (position < lineEnds[m]) {
3645 public void setPHPMode(boolean mode) {
3649 public final void setSource(char[] source) {
3650 setSource(null, source);
3653 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3654 //the source-buffer is set to sourceString
3655 this.compilationUnit = compilationUnit;
3656 if (source == null) {
3657 this.source = new char[0];
3659 this.source = source;
3662 initialPosition = currentPosition = 0;
3663 containsAssertKeyword = false;
3664 withoutUnicodeBuffer = new char[this.source.length];
3665 encapsedStringStack = new Stack();
3668 public String toString() {
3669 if (startPosition == source.length)
3670 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3671 if (currentPosition > source.length)
3672 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3673 char front[] = new char[startPosition];
3674 System.arraycopy(source, 0, front, 0, startPosition);
3675 int middleLength = (currentPosition - 1) - startPosition + 1;
3677 if (middleLength > -1) {
3678 middle = new char[middleLength];
3679 System.arraycopy(source, startPosition, middle, 0, middleLength);
3681 middle = new char[0];
3683 char end[] = new char[source.length - (currentPosition - 1)];
3684 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3685 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3686 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3690 public final String toStringAction(int act) {
3692 case TokenNameERROR:
3693 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3695 case TokenNameINLINE_HTML:
3696 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3697 case TokenNameIdentifier:
3698 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3699 case TokenNameVariable:
3700 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3701 case TokenNameabstract:
3702 return "abstract"; //$NON-NLS-1$
3704 return "AND"; //$NON-NLS-1$
3705 case TokenNamearray:
3706 return "array"; //$NON-NLS-1$
3708 return "as"; //$NON-NLS-1$
3709 case TokenNamebreak:
3710 return "break"; //$NON-NLS-1$
3712 return "case"; //$NON-NLS-1$
3713 case TokenNameclass:
3714 return "class"; //$NON-NLS-1$
3715 case TokenNamecatch:
3716 return "catch"; //$NON-NLS-1$
3717 case TokenNameclone:
3720 case TokenNameconst:
3723 case TokenNamecontinue:
3724 return "continue"; //$NON-NLS-1$
3725 case TokenNamedefault:
3726 return "default"; //$NON-NLS-1$
3727 // case TokenNamedefine :
3728 // return "define"; //$NON-NLS-1$
3730 return "do"; //$NON-NLS-1$
3732 return "echo"; //$NON-NLS-1$
3734 return "else"; //$NON-NLS-1$
3735 case TokenNameelseif:
3736 return "elseif"; //$NON-NLS-1$
3737 case TokenNameendfor:
3738 return "endfor"; //$NON-NLS-1$
3739 case TokenNameendforeach:
3740 return "endforeach"; //$NON-NLS-1$
3741 case TokenNameendif:
3742 return "endif"; //$NON-NLS-1$
3743 case TokenNameendswitch:
3744 return "endswitch"; //$NON-NLS-1$
3745 case TokenNameendwhile:
3746 return "endwhile"; //$NON-NLS-1$
3749 case TokenNameextends:
3750 return "extends"; //$NON-NLS-1$
3751 // case TokenNamefalse :
3752 // return "false"; //$NON-NLS-1$
3753 case TokenNamefinal:
3754 return "final"; //$NON-NLS-1$
3756 return "for"; //$NON-NLS-1$
3757 case TokenNameforeach:
3758 return "foreach"; //$NON-NLS-1$
3759 case TokenNamefunction:
3760 return "function"; //$NON-NLS-1$
3761 case TokenNameglobal:
3762 return "global"; //$NON-NLS-1$
3764 return "if"; //$NON-NLS-1$
3765 case TokenNameimplements:
3766 return "implements"; //$NON-NLS-1$
3767 case TokenNameinclude:
3768 return "include"; //$NON-NLS-1$
3769 case TokenNameinclude_once:
3770 return "include_once"; //$NON-NLS-1$
3771 case TokenNameinstanceof:
3772 return "instanceof"; //$NON-NLS-1$
3773 case TokenNameinterface:
3774 return "interface"; //$NON-NLS-1$
3775 case TokenNameisset:
3776 return "isset"; //$NON-NLS-1$
3778 return "list"; //$NON-NLS-1$
3780 return "new"; //$NON-NLS-1$
3781 // case TokenNamenull :
3782 // return "null"; //$NON-NLS-1$
3784 return "OR"; //$NON-NLS-1$
3785 case TokenNameprint:
3786 return "print"; //$NON-NLS-1$
3787 case TokenNameprivate:
3788 return "private"; //$NON-NLS-1$
3789 case TokenNameprotected:
3790 return "protected"; //$NON-NLS-1$
3791 case TokenNamepublic:
3792 return "public"; //$NON-NLS-1$
3793 case TokenNamerequire:
3794 return "require"; //$NON-NLS-1$
3795 case TokenNamerequire_once:
3796 return "require_once"; //$NON-NLS-1$
3797 case TokenNamereturn:
3798 return "return"; //$NON-NLS-1$
3799 case TokenNamestatic:
3800 return "static"; //$NON-NLS-1$
3801 case TokenNameswitch:
3802 return "switch"; //$NON-NLS-1$
3803 // case TokenNametrue :
3804 // return "true"; //$NON-NLS-1$
3805 case TokenNameunset:
3806 return "unset"; //$NON-NLS-1$
3808 return "var"; //$NON-NLS-1$
3809 case TokenNamewhile:
3810 return "while"; //$NON-NLS-1$
3812 return "XOR"; //$NON-NLS-1$
3813 // case TokenNamethis :
3814 // return "$this"; //$NON-NLS-1$
3815 case TokenNameIntegerLiteral:
3816 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3817 case TokenNameDoubleLiteral:
3818 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3819 case TokenNameStringDoubleQuote:
3820 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3821 case TokenNameStringSingleQuote:
3822 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3823 case TokenNameStringInterpolated:
3824 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3825 case TokenNameEncapsedString0:
3826 return "`"; //$NON-NLS-1$
3827 case TokenNameEncapsedString1:
3828 return "\'"; //$NON-NLS-1$
3829 case TokenNameEncapsedString2:
3830 return "\""; //$NON-NLS-1$
3831 case TokenNameSTRING:
3832 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3833 case TokenNameHEREDOC:
3834 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3835 case TokenNamePLUS_PLUS:
3836 return "++"; //$NON-NLS-1$
3837 case TokenNameMINUS_MINUS:
3838 return "--"; //$NON-NLS-1$
3839 case TokenNameEQUAL_EQUAL:
3840 return "=="; //$NON-NLS-1$
3841 case TokenNameEQUAL_EQUAL_EQUAL:
3842 return "==="; //$NON-NLS-1$
3843 case TokenNameEQUAL_GREATER:
3844 return "=>"; //$NON-NLS-1$
3845 case TokenNameLESS_EQUAL:
3846 return "<="; //$NON-NLS-1$
3847 case TokenNameGREATER_EQUAL:
3848 return ">="; //$NON-NLS-1$
3849 case TokenNameNOT_EQUAL:
3850 return "!="; //$NON-NLS-1$
3851 case TokenNameNOT_EQUAL_EQUAL:
3852 return "!=="; //$NON-NLS-1$
3853 case TokenNameLEFT_SHIFT:
3854 return "<<"; //$NON-NLS-1$
3855 case TokenNameRIGHT_SHIFT:
3856 return ">>"; //$NON-NLS-1$
3857 case TokenNamePLUS_EQUAL:
3858 return "+="; //$NON-NLS-1$
3859 case TokenNameMINUS_EQUAL:
3860 return "-="; //$NON-NLS-1$
3861 case TokenNameMULTIPLY_EQUAL:
3862 return "*="; //$NON-NLS-1$
3863 case TokenNameDIVIDE_EQUAL:
3864 return "/="; //$NON-NLS-1$
3865 case TokenNameAND_EQUAL:
3866 return "&="; //$NON-NLS-1$
3867 case TokenNameOR_EQUAL:
3868 return "|="; //$NON-NLS-1$
3869 case TokenNameXOR_EQUAL:
3870 return "^="; //$NON-NLS-1$
3871 case TokenNameREMAINDER_EQUAL:
3872 return "%="; //$NON-NLS-1$
3873 case TokenNameDOT_EQUAL:
3874 return ".="; //$NON-NLS-1$
3875 case TokenNameLEFT_SHIFT_EQUAL:
3876 return "<<="; //$NON-NLS-1$
3877 case TokenNameRIGHT_SHIFT_EQUAL:
3878 return ">>="; //$NON-NLS-1$
3879 case TokenNameOR_OR:
3880 return "||"; //$NON-NLS-1$
3881 case TokenNameAND_AND:
3882 return "&&"; //$NON-NLS-1$
3884 return "+"; //$NON-NLS-1$
3885 case TokenNameMINUS:
3886 return "-"; //$NON-NLS-1$
3887 case TokenNameMINUS_GREATER:
3890 return "!"; //$NON-NLS-1$
3891 case TokenNameREMAINDER:
3892 return "%"; //$NON-NLS-1$
3894 return "^"; //$NON-NLS-1$
3896 return "&"; //$NON-NLS-1$
3897 case TokenNameMULTIPLY:
3898 return "*"; //$NON-NLS-1$
3900 return "|"; //$NON-NLS-1$
3901 case TokenNameTWIDDLE:
3902 return "~"; //$NON-NLS-1$
3903 case TokenNameTWIDDLE_EQUAL:
3904 return "~="; //$NON-NLS-1$
3905 case TokenNameDIVIDE:
3906 return "/"; //$NON-NLS-1$
3907 case TokenNameGREATER:
3908 return ">"; //$NON-NLS-1$
3910 return "<"; //$NON-NLS-1$
3911 case TokenNameLPAREN:
3912 return "("; //$NON-NLS-1$
3913 case TokenNameRPAREN:
3914 return ")"; //$NON-NLS-1$
3915 case TokenNameLBRACE:
3916 return "{"; //$NON-NLS-1$
3917 case TokenNameRBRACE:
3918 return "}"; //$NON-NLS-1$
3919 case TokenNameLBRACKET:
3920 return "["; //$NON-NLS-1$
3921 case TokenNameRBRACKET:
3922 return "]"; //$NON-NLS-1$
3923 case TokenNameSEMICOLON:
3924 return ";"; //$NON-NLS-1$
3925 case TokenNameQUESTION:
3926 return "?"; //$NON-NLS-1$
3927 case TokenNameCOLON:
3928 return ":"; //$NON-NLS-1$
3929 case TokenNameCOMMA:
3930 return ","; //$NON-NLS-1$
3932 return "."; //$NON-NLS-1$
3933 case TokenNameEQUAL:
3934 return "="; //$NON-NLS-1$
3937 case TokenNameDOLLAR:
3939 case TokenNameDOLLAR_LBRACE:
3941 case TokenNameLBRACE_DOLLAR:
3944 return "EOF"; //$NON-NLS-1$
3945 case TokenNameWHITESPACE:
3946 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3947 case TokenNameCOMMENT_LINE:
3948 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3949 case TokenNameCOMMENT_BLOCK:
3950 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3951 case TokenNameCOMMENT_PHPDOC:
3952 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3953 // case TokenNameHTML :
3954 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3957 return "__FILE__"; //$NON-NLS-1$
3959 return "__LINE__"; //$NON-NLS-1$
3960 case TokenNameCLASS_C:
3961 return "__CLASS__"; //$NON-NLS-1$
3962 case TokenNameMETHOD_C:
3963 return "__METHOD__"; //$NON-NLS-1$
3964 case TokenNameFUNC_C:
3965 return "__FUNCTION__"; //$NON-NLS-1
3966 case TokenNameboolCAST:
3967 return "( bool )"; //$NON-NLS-1$
3968 case TokenNameintCAST:
3969 return "( int )"; //$NON-NLS-1$
3970 case TokenNamedoubleCAST:
3971 return "( double )"; //$NON-NLS-1$
3972 case TokenNameobjectCAST:
3973 return "( object )"; //$NON-NLS-1$
3974 case TokenNamestringCAST:
3975 return "( string )"; //$NON-NLS-1$
3977 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3985 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3986 this(tokenizeComments, tokenizeWhiteSpace, false);
3989 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3990 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3993 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
3994 boolean assertMode) {
3995 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null,true);
3998 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3999 boolean checkNonExternalizedStringLiterals,
4000 boolean assertMode, boolean tokenizeStrings,
4002 char[][] taskPriorities,
4003 boolean isTaskCaseSensitive) {
4004 this.eofPosition = Integer.MAX_VALUE;
4005 this.tokenizeComments = tokenizeComments;
4006 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4007 this.tokenizeStrings = tokenizeStrings;
4008 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4009 this.assertMode = assertMode;
4010 this.encapsedStringStack = null;
4011 this.taskTags = taskTags;
4012 this.taskPriorities = taskPriorities;
4015 private void checkNonExternalizeString() throws InvalidInputException {
4016 if (currentLine == null)
4018 parseTags(currentLine);
4021 private void parseTags(NLSLine line) throws InvalidInputException {
4022 String s = new String(getCurrentTokenSource());
4023 int pos = s.indexOf(TAG_PREFIX);
4024 int lineLength = line.size();
4026 int start = pos + TAG_PREFIX_LENGTH;
4027 int end = s.indexOf(TAG_POSTFIX, start);
4028 String index = s.substring(start, end);
4031 i = Integer.parseInt(index) - 1;
4032 // Tags are one based not zero based.
4033 } catch (NumberFormatException e) {
4034 i = -1; // we don't want to consider this as a valid NLS tag
4036 if (line.exists(i)) {
4039 pos = s.indexOf(TAG_PREFIX, start);
4041 this.nonNLSStrings = new StringLiteral[lineLength];
4042 int nonNLSCounter = 0;
4043 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4044 StringLiteral literal = (StringLiteral) iterator.next();
4045 if (literal != null) {
4046 this.nonNLSStrings[nonNLSCounter++] = literal;
4049 if (nonNLSCounter == 0) {
4050 this.nonNLSStrings = null;
4054 this.wasNonExternalizedStringLiteral = true;
4055 if (nonNLSCounter != lineLength) {
4056 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4061 public final void scanEscapeCharacter() throws InvalidInputException {
4062 // the string with "\\u" is a legal string of two chars \ and u
4063 //thus we use a direct access to the source (for regular cases).
4064 if (unicodeAsBackSlash) {
4065 // consume next character
4066 unicodeAsBackSlash = false;
4067 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4068 // (source[currentPosition] == 'u')) {
4069 // getNextUnicodeChar();
4071 if (withoutUnicodePtr != 0) {
4072 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4076 currentCharacter = source[currentPosition++];
4077 switch (currentCharacter) {
4079 currentCharacter = '\b';
4082 currentCharacter = '\t';
4085 currentCharacter = '\n';
4088 currentCharacter = '\f';
4091 currentCharacter = '\r';
4094 currentCharacter = '\"';
4097 currentCharacter = '\'';
4100 currentCharacter = '\\';
4103 // -----------octal escape--------------
4105 // OctalDigit OctalDigit
4106 // ZeroToThree OctalDigit OctalDigit
4107 int number = Character.getNumericValue(currentCharacter);
4108 if (number >= 0 && number <= 7) {
4109 boolean zeroToThreeNot = number > 3;
4110 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4111 int digit = Character.getNumericValue(currentCharacter);
4112 if (digit >= 0 && digit <= 7) {
4113 number = (number * 8) + digit;
4114 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4115 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4116 // Digit --> ignore last character
4119 digit = Character.getNumericValue(currentCharacter);
4120 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4121 // OctalDigit OctalDigit
4122 number = (number * 8) + digit;
4123 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4124 // --> ignore last character
4128 } else { // has read \OctalDigit NonDigit--> ignore last
4132 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4136 } else { // has read \OctalDigit --> ignore last character
4140 throw new InvalidInputException(INVALID_ESCAPE);
4141 currentCharacter = (char) number;
4143 throw new InvalidInputException(INVALID_ESCAPE);
4147 //chech presence of task: tags
4148 //TODO (frederic) see if we need to take unicode characters into account...
4149 public void checkTaskTag(int commentStart, int commentEnd) {
4150 char[] src = this.source;
4152 // only look for newer task: tags
4153 if (this.foundTaskCount > 0
4154 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4157 int foundTaskIndex = this.foundTaskCount;
4158 char previous = src[commentStart+1]; // should be '*' or '/'
4160 int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4162 char[] priority = null;
4163 // check for tag occurrence only if not ambiguous with javadoc tag
4164 if (previous != '@') {
4165 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4166 tag = this.taskTags[itag];
4167 int tagLength = tag.length;
4168 if (tagLength == 0) continue nextTag;
4170 // ensure tag is not leaded with letter if tag starts with a letter
4171 if (Character.isJavaIdentifierStart(tag[0])) {
4172 if (Character.isJavaIdentifierPart(previous)) {
4177 for (int t = 0; t < tagLength; t++) {
4180 if (x >= this.eofPosition || x >= commentEnd) continue nextTag;
4181 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4182 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4187 // ensure tag is not followed with letter if tag finishes with a letter
4188 if (i+tagLength < commentEnd && Character.isJavaIdentifierPart(src[i+tagLength-1])) {
4189 if (Character.isJavaIdentifierPart(src[i + tagLength]))
4192 if (this.foundTaskTags == null) {
4193 this.foundTaskTags = new char[5][];
4194 this.foundTaskMessages = new char[5][];
4195 this.foundTaskPriorities = new char[5][];
4196 this.foundTaskPositions = new int[5][];
4197 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4198 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4199 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4200 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4201 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4204 priority = this.taskPriorities != null && itag < this.taskPriorities.length
4205 ? this.taskPriorities[itag]
4208 this.foundTaskTags[this.foundTaskCount] = tag;
4209 this.foundTaskPriorities[this.foundTaskCount] = priority;
4210 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4211 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4212 this.foundTaskCount++;
4213 i += tagLength - 1; // will be incremented when looping
4219 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4220 // retrieve message start and end positions
4221 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4222 int max_value = i + 1 < this.foundTaskCount
4223 ? this.foundTaskPositions[i + 1][0] - 1
4225 // at most beginning of next task
4226 if (max_value < msgStart) {
4227 max_value = msgStart; // would only occur if tag is before EOF.
4231 for (int j = msgStart; j < max_value; j++) {
4232 if ((c = src[j]) == '\n' || c == '\r') {
4238 for (int j = max_value; j > msgStart; j--) {
4239 if ((c = src[j]) == '*') {
4247 if (msgStart == end)
4250 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4252 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4254 // update the end position of the task
4255 this.foundTaskPositions[i][1] = end;
4256 // get the message source
4257 final int messageLength = end - msgStart + 1;
4258 char[] message = new char[messageLength];
4259 System.arraycopy(src, msgStart, message, 0, messageLength);
4260 this.foundTaskMessages[i] = message;
4264 // chech presence of task: tags
4265 // public void checkTaskTag(int commentStart, int commentEnd) {
4266 // // only look for newer task: tags
4267 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4270 // int foundTaskIndex = this.foundTaskCount;
4271 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4272 // char[] tag = null;
4273 // char[] priority = null;
4274 // // check for tag occurrence
4275 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4276 // tag = this.taskTags[itag];
4277 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4278 // int tagLength = tag.length;
4279 // for (int t = 0; t < tagLength; t++) {
4280 // if (this.source[i + t] != tag[t])
4281 // continue nextTag;
4283 // if (this.foundTaskTags == null) {
4284 // this.foundTaskTags = new char[5][];
4285 // this.foundTaskMessages = new char[5][];
4286 // this.foundTaskPriorities = new char[5][];
4287 // this.foundTaskPositions = new int[5][];
4288 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4289 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4290 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4291 // this.foundTaskCount);
4292 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4293 // this.foundTaskCount);
4294 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4295 // this.foundTaskCount);
4297 // this.foundTaskTags[this.foundTaskCount] = tag;
4298 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4299 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4300 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4301 // this.foundTaskCount++;
4302 // i += tagLength - 1; // will be incremented when looping
4305 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4306 // // retrieve message start and end positions
4307 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4308 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4309 // // at most beginning of next task
4310 // if (max_value < msgStart)
4311 // max_value = msgStart; // would only occur if tag is before EOF.
4314 // for (int j = msgStart; j < max_value; j++) {
4315 // if ((c = this.source[j]) == '\n' || c == '\r') {
4321 // for (int j = max_value; j > msgStart; j--) {
4322 // if ((c = this.source[j]) == '*') {
4330 // if (msgStart == end)
4331 // continue; // empty
4332 // // trim the message
4333 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4335 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4337 // // update the end position of the task
4338 // this.foundTaskPositions[i][1] = end;
4339 // // get the message source
4340 // final int messageLength = end - msgStart + 1;
4341 // char[] message = new char[messageLength];
4342 // System.arraycopy(source, msgStart, message, 0, messageLength);
4343 // this.foundTaskMessages[i] = message;