1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
23 public class Scanner implements IScanner, ITerminalSymbols {
26 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
27 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct
28 * char) - sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120 static final int TableSize = 30, InternalTableSize = 6;
122 public static final int OptimizedLength = 6;
124 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125 // support for detecting non-externalized string literals
126 int currentLineNr = -1;
127 int previousLineNr = -1;
128 NLSLine currentLine = null;
129 List lines = new ArrayList();
130 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134 public StringLiteral[] nonNLSStrings = null;
135 public boolean checkNonExternalizedStringLiterals = true;
136 public boolean wasNonExternalizedStringLiteral = false;
139 for (int i = 0; i < 6; i++) {
140 for (int j = 0; j < TableSize; j++) {
141 for (int k = 0; k < InternalTableSize; k++) {
142 charArray_length[i][j][k] = initCharArray;
147 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
149 public static final int RoundBracket = 0;
150 public static final int SquareBracket = 1;
151 public static final int CurlyBracket = 2;
152 public static final int BracketKinds = 3;
155 public char[][] foundTaskTags = null;
156 public char[][] foundTaskMessages;
157 public char[][] foundTaskPriorities = null;
158 public int[][] foundTaskPositions;
159 public int foundTaskCount = 0;
160 public char[][] taskTags = null;
161 public char[][] taskPriorities = null;
163 public static final boolean DEBUG = true;
168 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
169 this(tokenizeComments, tokenizeWhiteSpace, false);
173 * Determines if the specified character is permissible as the first character in a PHP identifier
175 public static boolean isPHPIdentifierStart(char ch) {
176 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
180 * Determines if the specified character may be part of a PHP identifier as other than the first character
182 public static boolean isPHPIdentifierPart(char ch) {
183 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
186 public final boolean atEnd() {
187 // This code is not relevant if source is
188 // Only a part of the real stream input
190 return source.length == currentPosition;
192 public char[] getCurrentIdentifierSource() {
193 //return the token REAL source (aka unicodes are precomputed)
196 // if (withoutUnicodePtr != 0)
197 // //0 is used as a fast test flag so the real first char is in position 1
199 // withoutUnicodeBuffer,
201 // result = new char[withoutUnicodePtr],
203 // withoutUnicodePtr);
205 int length = currentPosition - startPosition;
206 switch (length) { // see OptimizedLength
208 return optimizedCurrentTokenSource1();
210 return optimizedCurrentTokenSource2();
212 return optimizedCurrentTokenSource3();
214 return optimizedCurrentTokenSource4();
216 return optimizedCurrentTokenSource5();
218 return optimizedCurrentTokenSource6();
221 System.arraycopy(source, startPosition, result = new char[length], 0, length);
225 public int getCurrentTokenEndPosition() {
226 return this.currentPosition - 1;
229 public final char[] getCurrentTokenSource() {
230 // Return the token REAL source (aka unicodes are precomputed)
233 // if (withoutUnicodePtr != 0)
234 // // 0 is used as a fast test flag so the real first char is in position 1
236 // withoutUnicodeBuffer,
238 // result = new char[withoutUnicodePtr],
240 // withoutUnicodePtr);
243 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
248 public final char[] getCurrentTokenSource(int startPos) {
249 // Return the token REAL source (aka unicodes are precomputed)
252 // if (withoutUnicodePtr != 0)
253 // // 0 is used as a fast test flag so the real first char is in position 1
255 // withoutUnicodeBuffer,
257 // result = new char[withoutUnicodePtr],
259 // withoutUnicodePtr);
262 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
267 public final char[] getCurrentTokenSourceString() {
268 //return the token REAL source (aka unicodes are precomputed).
269 //REMOVE the two " that are at the beginning and the end.
272 if (withoutUnicodePtr != 0)
273 //0 is used as a fast test flag so the real first char is in position 1
274 System.arraycopy(withoutUnicodeBuffer, 2,
275 //2 is 1 (real start) + 1 (to jump over the ")
276 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
279 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
283 public int getCurrentTokenStartPosition() {
284 return this.startPosition;
287 public final char[] getCurrentStringLiteralSource() {
288 // Return the token REAL source (aka unicodes are precomputed)
293 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
299 * Search the source position corresponding to the end of a given line number
301 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
303 * In case the given line number is inconsistent, answers -1.
305 public final int getLineEnd(int lineNumber) {
307 if (lineEnds == null)
309 if (lineNumber >= lineEnds.length)
314 if (lineNumber == lineEnds.length - 1)
316 return lineEnds[lineNumber - 1];
317 // next line start one character behind the lineEnd of the previous line
320 * Search the source position corresponding to the beginning of a given line number
322 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
324 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
326 * In case the given line number is inconsistent, answers -1.
328 public final int getLineStart(int lineNumber) {
330 if (lineEnds == null)
332 if (lineNumber >= lineEnds.length)
338 return initialPosition;
339 return lineEnds[lineNumber - 2] + 1;
340 // next line start one character behind the lineEnd of the previous line
342 public final boolean getNextChar(char testedChar) {
344 //handle the case of unicode.
345 //when a unicode appears then we must use a buffer that holds char internal values
346 //At the end of this method currentCharacter holds the new visited char
347 //and currentPosition points right next after it
348 //Both previous lines are true if the currentCharacter is == to the testedChar
349 //On false, no side effect has occured.
351 //ALL getNextChar.... ARE OPTIMIZED COPIES
353 int temp = currentPosition;
355 currentCharacter = source[currentPosition++];
356 // if (((currentCharacter = source[currentPosition++]) == '\\')
357 // && (source[currentPosition] == 'u')) {
358 // //-------------unicode traitement ------------
359 // int c1, c2, c3, c4;
360 // int unicodeSize = 6;
361 // currentPosition++;
362 // while (source[currentPosition] == 'u') {
363 // currentPosition++;
367 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
369 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
371 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
373 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
375 // currentPosition = temp;
379 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
380 // if (currentCharacter != testedChar) {
381 // currentPosition = temp;
384 // unicodeAsBackSlash = currentCharacter == '\\';
386 // //need the unicode buffer
387 // if (withoutUnicodePtr == 0) {
388 // //buffer all the entries that have been left aside....
389 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
393 // withoutUnicodeBuffer,
395 // withoutUnicodePtr);
397 // //fill the buffer with the char
398 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
401 // } //-------------end unicode traitement--------------
403 if (currentCharacter != testedChar) {
404 currentPosition = temp;
407 unicodeAsBackSlash = false;
408 // if (withoutUnicodePtr != 0)
409 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
412 } catch (IndexOutOfBoundsException e) {
413 unicodeAsBackSlash = false;
414 currentPosition = temp;
418 public final int getNextChar(char testedChar1, char testedChar2) {
419 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
420 //test can be done with (x==0) for the first and (x>0) for the second
421 //handle the case of unicode.
422 //when a unicode appears then we must use a buffer that holds char internal values
423 //At the end of this method currentCharacter holds the new visited char
424 //and currentPosition points right next after it
425 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
426 //On false, no side effect has occured.
428 //ALL getNextChar.... ARE OPTIMIZED COPIES
430 int temp = currentPosition;
433 currentCharacter = source[currentPosition++];
434 // if (((currentCharacter = source[currentPosition++]) == '\\')
435 // && (source[currentPosition] == 'u')) {
436 // //-------------unicode traitement ------------
437 // int c1, c2, c3, c4;
438 // int unicodeSize = 6;
439 // currentPosition++;
440 // while (source[currentPosition] == 'u') {
441 // currentPosition++;
445 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
447 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
449 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
451 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
453 // currentPosition = temp;
457 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
458 // if (currentCharacter == testedChar1)
460 // else if (currentCharacter == testedChar2)
463 // currentPosition = temp;
467 // //need the unicode buffer
468 // if (withoutUnicodePtr == 0) {
469 // //buffer all the entries that have been left aside....
470 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
474 // withoutUnicodeBuffer,
476 // withoutUnicodePtr);
478 // //fill the buffer with the char
479 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
481 // } //-------------end unicode traitement--------------
483 if (currentCharacter == testedChar1)
485 else if (currentCharacter == testedChar2)
488 currentPosition = temp;
492 // if (withoutUnicodePtr != 0)
493 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
496 } catch (IndexOutOfBoundsException e) {
497 currentPosition = temp;
501 public final boolean getNextCharAsDigit() {
503 //handle the case of unicode.
504 //when a unicode appears then we must use a buffer that holds char internal values
505 //At the end of this method currentCharacter holds the new visited char
506 //and currentPosition points right next after it
507 //Both previous lines are true if the currentCharacter is a digit
508 //On false, no side effect has occured.
510 //ALL getNextChar.... ARE OPTIMIZED COPIES
512 int temp = currentPosition;
514 currentCharacter = source[currentPosition++];
515 // if (((currentCharacter = source[currentPosition++]) == '\\')
516 // && (source[currentPosition] == 'u')) {
517 // //-------------unicode traitement ------------
518 // int c1, c2, c3, c4;
519 // int unicodeSize = 6;
520 // currentPosition++;
521 // while (source[currentPosition] == 'u') {
522 // currentPosition++;
526 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
528 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
530 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
532 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
534 // currentPosition = temp;
538 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
539 // if (!Character.isDigit(currentCharacter)) {
540 // currentPosition = temp;
544 // //need the unicode buffer
545 // if (withoutUnicodePtr == 0) {
546 // //buffer all the entries that have been left aside....
547 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
551 // withoutUnicodeBuffer,
553 // withoutUnicodePtr);
555 // //fill the buffer with the char
556 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
558 // } //-------------end unicode traitement--------------
560 if (!Character.isDigit(currentCharacter)) {
561 currentPosition = temp;
564 // if (withoutUnicodePtr != 0)
565 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
568 } catch (IndexOutOfBoundsException e) {
569 currentPosition = temp;
573 public final boolean getNextCharAsDigit(int radix) {
575 //handle the case of unicode.
576 //when a unicode appears then we must use a buffer that holds char internal values
577 //At the end of this method currentCharacter holds the new visited char
578 //and currentPosition points right next after it
579 //Both previous lines are true if the currentCharacter is a digit base on radix
580 //On false, no side effect has occured.
582 //ALL getNextChar.... ARE OPTIMIZED COPIES
584 int temp = currentPosition;
586 currentCharacter = source[currentPosition++];
587 // if (((currentCharacter = source[currentPosition++]) == '\\')
588 // && (source[currentPosition] == 'u')) {
589 // //-------------unicode traitement ------------
590 // int c1, c2, c3, c4;
591 // int unicodeSize = 6;
592 // currentPosition++;
593 // while (source[currentPosition] == 'u') {
594 // currentPosition++;
598 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
600 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
602 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
604 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
606 // currentPosition = temp;
610 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
611 // if (Character.digit(currentCharacter, radix) == -1) {
612 // currentPosition = temp;
616 // //need the unicode buffer
617 // if (withoutUnicodePtr == 0) {
618 // //buffer all the entries that have been left aside....
619 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
623 // withoutUnicodeBuffer,
625 // withoutUnicodePtr);
627 // //fill the buffer with the char
628 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
630 // } //-------------end unicode traitement--------------
632 if (Character.digit(currentCharacter, radix) == -1) {
633 currentPosition = temp;
636 // if (withoutUnicodePtr != 0)
637 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
640 } catch (IndexOutOfBoundsException e) {
641 currentPosition = temp;
645 public boolean getNextCharAsJavaIdentifierPart() {
647 //handle the case of unicode.
648 //when a unicode appears then we must use a buffer that holds char internal values
649 //At the end of this method currentCharacter holds the new visited char
650 //and currentPosition points right next after it
651 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
652 //On false, no side effect has occured.
654 //ALL getNextChar.... ARE OPTIMIZED COPIES
656 int temp = currentPosition;
658 currentCharacter = source[currentPosition++];
659 // if (((currentCharacter = source[currentPosition++]) == '\\')
660 // && (source[currentPosition] == 'u')) {
661 // //-------------unicode traitement ------------
662 // int c1, c2, c3, c4;
663 // int unicodeSize = 6;
664 // currentPosition++;
665 // while (source[currentPosition] == 'u') {
666 // currentPosition++;
670 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
672 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
674 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
676 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
678 // currentPosition = temp;
682 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
683 // if (!isPHPIdentifierPart(currentCharacter)) {
684 // currentPosition = temp;
688 // //need the unicode buffer
689 // if (withoutUnicodePtr == 0) {
690 // //buffer all the entries that have been left aside....
691 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
695 // withoutUnicodeBuffer,
697 // withoutUnicodePtr);
699 // //fill the buffer with the char
700 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
702 // } //-------------end unicode traitement--------------
704 if (!isPHPIdentifierPart(currentCharacter)) {
705 currentPosition = temp;
709 // if (withoutUnicodePtr != 0)
710 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
713 } catch (IndexOutOfBoundsException e) {
714 currentPosition = temp;
719 public int getNextToken() throws InvalidInputException {
720 int htmlPosition = currentPosition;
723 currentCharacter = source[currentPosition++];
724 if (currentCharacter == '<') {
725 if (getNextChar('?')) {
726 currentCharacter = source[currentPosition++];
727 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
729 startPosition = currentPosition;
731 if (tokenizeWhiteSpace) {
732 // && (whiteStart != currentPosition - 1)) {
733 // reposition scanner in case we are interested by spaces as tokens
734 startPosition = htmlPosition;
735 return TokenNameHTML;
738 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
740 int test = getNextChar('H', 'h');
742 test = getNextChar('P', 'p');
745 startPosition = currentPosition;
748 if (tokenizeWhiteSpace) {
749 // && (whiteStart != currentPosition - 1)) {
750 // reposition scanner in case we are interested by spaces as tokens
751 startPosition = htmlPosition;
752 return TokenNameHTML;
761 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
762 if (recordLineSeparator) {
769 } //-----------------end switch while try--------------------
770 catch (IndexOutOfBoundsException e) {
771 if (tokenizeWhiteSpace) {
772 // && (whiteStart != currentPosition - 1)) {
773 // reposition scanner in case we are interested by spaces as tokens
774 startPosition = htmlPosition;
782 jumpOverMethodBody();
784 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
787 while (true) { //loop for jumping over comments
788 withoutUnicodePtr = 0;
789 //start with a new token (even comment written with unicode )
791 // ---------Consume white space and handles startPosition---------
792 int whiteStart = currentPosition;
793 boolean isWhiteSpace;
795 startPosition = currentPosition;
796 currentCharacter = source[currentPosition++];
797 // if (((currentCharacter = source[currentPosition++]) == '\\')
798 // && (source[currentPosition] == 'u')) {
799 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
801 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
802 checkNonExternalizeString();
803 if (recordLineSeparator) {
809 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
811 } while (isWhiteSpace);
812 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
813 // reposition scanner in case we are interested by spaces as tokens
815 startPosition = whiteStart;
816 return TokenNameWHITESPACE;
818 //little trick to get out in the middle of a source compuation
819 if (currentPosition > eofPosition)
822 // ---------Identify the next token-------------
824 switch (currentCharacter) {
826 return TokenNameLPAREN;
828 return TokenNameRPAREN;
830 return TokenNameLBRACE;
832 return TokenNameRBRACE;
834 return TokenNameLBRACKET;
836 return TokenNameRBRACKET;
838 return TokenNameSEMICOLON;
840 return TokenNameCOMMA;
843 if (getNextCharAsDigit())
844 return scanNumber(true);
849 if ((test = getNextChar('+', '=')) == 0)
850 return TokenNamePLUS_PLUS;
852 return TokenNamePLUS_EQUAL;
853 return TokenNamePLUS;
858 if ((test = getNextChar('-', '=')) == 0)
859 return TokenNameMINUS_MINUS;
861 return TokenNameMINUS_EQUAL;
862 if (getNextChar('>'))
863 return TokenNameMINUS_GREATER;
865 return TokenNameMINUS;
868 if (getNextChar('='))
869 return TokenNameTWIDDLE_EQUAL;
870 return TokenNameTWIDDLE;
872 if (getNextChar('=')) {
873 if (getNextChar('=')) {
874 return TokenNameNOT_EQUAL_EQUAL;
876 return TokenNameNOT_EQUAL;
880 if (getNextChar('='))
881 return TokenNameMULTIPLY_EQUAL;
882 return TokenNameMULTIPLY;
884 if (getNextChar('='))
885 return TokenNameREMAINDER_EQUAL;
886 return TokenNameREMAINDER;
890 if ((test = getNextChar('=', '<')) == 0)
891 return TokenNameLESS_EQUAL;
893 if (getNextChar('='))
894 return TokenNameLEFT_SHIFT_EQUAL;
895 if (getNextChar('<')) {
896 int heredocStart = currentPosition;
897 int heredocLength = 0;
898 currentCharacter = source[currentPosition++];
899 if (isPHPIdentifierStart(currentCharacter)) {
900 currentCharacter = source[currentPosition++];
902 return TokenNameERROR;
904 while (isPHPIdentifierPart(currentCharacter)) {
905 currentCharacter = source[currentPosition++];
908 heredocLength = currentPosition - heredocStart - 1;
910 // heredoc end-tag determination
911 boolean endTag = true;
914 ch = source[currentPosition++];
915 if (ch == '\r' || ch == '\n') {
916 if (recordLineSeparator) {
921 for (int i = 0; i < heredocLength; i++) {
922 if (source[currentPosition + i] != source[heredocStart + i]) {
928 currentPosition += heredocLength - 1;
929 currentCharacter = source[currentPosition++];
930 break; // do...while loop
938 return TokenNameHEREDOC;
940 return TokenNameLEFT_SHIFT;
942 return TokenNameLESS;
947 if ((test = getNextChar('=', '>')) == 0)
948 return TokenNameGREATER_EQUAL;
950 if ((test = getNextChar('=', '>')) == 0)
951 return TokenNameRIGHT_SHIFT_EQUAL;
952 return TokenNameRIGHT_SHIFT;
954 return TokenNameGREATER;
957 if (getNextChar('=')) {
958 if (getNextChar('=')) {
959 return TokenNameEQUAL_EQUAL_EQUAL;
961 return TokenNameEQUAL_EQUAL;
963 if (getNextChar('>'))
964 return TokenNameEQUAL_GREATER;
965 return TokenNameEQUAL;
969 if ((test = getNextChar('&', '=')) == 0)
970 return TokenNameAND_AND;
972 return TokenNameAND_EQUAL;
978 if ((test = getNextChar('|', '=')) == 0)
979 return TokenNameOR_OR;
981 return TokenNameOR_EQUAL;
985 if (getNextChar('='))
986 return TokenNameXOR_EQUAL;
989 if (getNextChar('>')) {
991 return TokenNameStopPHP;
993 return TokenNameQUESTION;
995 if (getNextChar(':'))
996 return TokenNameCOLON_COLON;
997 return TokenNameCOLON;
1003 // if ((test = getNextChar('\n', '\r')) == 0) {
1004 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1007 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1008 // for (int lookAhead = 0;
1011 // if (currentPosition + lookAhead
1012 // == source.length)
1014 // if (source[currentPosition + lookAhead]
1017 // if (source[currentPosition + lookAhead]
1019 // currentPosition += lookAhead + 1;
1023 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1026 // if (getNextChar('\'')) {
1027 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1028 // for (int lookAhead = 0;
1031 // if (currentPosition + lookAhead
1032 // == source.length)
1034 // if (source[currentPosition + lookAhead]
1037 // if (source[currentPosition + lookAhead]
1039 // currentPosition += lookAhead + 1;
1043 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1045 // if (getNextChar('\\'))
1046 // scanEscapeCharacter();
1047 // else { // consume next character
1048 // unicodeAsBackSlash = false;
1049 // if (((currentCharacter = source[currentPosition++])
1051 // && (source[currentPosition] == 'u')) {
1052 // getNextUnicodeChar();
1054 // if (withoutUnicodePtr != 0) {
1055 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1056 // currentCharacter;
1060 // // if (getNextChar('\''))
1061 // // return TokenNameCharacterLiteral;
1062 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1063 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1064 // if (currentPosition + lookAhead == source.length)
1066 // if (source[currentPosition + lookAhead] == '\n')
1068 // if (source[currentPosition + lookAhead] == '\'') {
1069 // currentPosition += lookAhead + 1;
1073 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1076 // consume next character
1077 unicodeAsBackSlash = false;
1078 currentCharacter = source[currentPosition++];
1079 // if (((currentCharacter = source[currentPosition++]) == '\\')
1080 // && (source[currentPosition] == 'u')) {
1081 // getNextUnicodeChar();
1083 // if (withoutUnicodePtr != 0) {
1084 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1085 // currentCharacter;
1089 while (currentCharacter != '\'') {
1091 /** ** in PHP \r and \n are valid in string literals *** */
1092 // if ((currentCharacter == '\n')
1093 // || (currentCharacter == '\r')) {
1094 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1095 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1096 // if (currentPosition + lookAhead == source.length)
1098 // if (source[currentPosition + lookAhead] == '\n')
1100 // if (source[currentPosition + lookAhead] == '\"') {
1101 // currentPosition += lookAhead + 1;
1105 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1107 if (currentCharacter == '\\') {
1108 int escapeSize = currentPosition;
1109 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1110 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1111 scanSingleQuotedEscapeCharacter();
1112 escapeSize = currentPosition - escapeSize;
1113 if (withoutUnicodePtr == 0) {
1114 //buffer all the entries that have been left aside....
1115 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1116 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1117 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1118 } else { //overwrite the / in the buffer
1119 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1120 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1121 withoutUnicodePtr--;
1125 // consume next character
1126 unicodeAsBackSlash = false;
1127 currentCharacter = source[currentPosition++];
1128 // if (((currentCharacter = source[currentPosition++]) == '\\')
1129 // && (source[currentPosition] == 'u')) {
1130 // getNextUnicodeChar();
1132 if (withoutUnicodePtr != 0) {
1133 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1138 } catch (IndexOutOfBoundsException e) {
1139 throw new InvalidInputException(UNTERMINATED_STRING);
1140 } catch (InvalidInputException e) {
1141 if (e.getMessage().equals(INVALID_ESCAPE)) {
1142 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1143 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1144 if (currentPosition + lookAhead == source.length)
1146 if (source[currentPosition + lookAhead] == '\n')
1148 if (source[currentPosition + lookAhead] == '\'') {
1149 currentPosition += lookAhead + 1;
1157 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1158 if (currentLine == null) {
1159 currentLine = new NLSLine();
1160 lines.add(currentLine);
1162 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1164 return TokenNameStringConstant;
1167 // consume next character
1168 unicodeAsBackSlash = false;
1169 currentCharacter = source[currentPosition++];
1170 // if (((currentCharacter = source[currentPosition++]) == '\\')
1171 // && (source[currentPosition] == 'u')) {
1172 // getNextUnicodeChar();
1174 // if (withoutUnicodePtr != 0) {
1175 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1176 // currentCharacter;
1180 while (currentCharacter != '"') {
1182 /** ** in PHP \r and \n are valid in string literals *** */
1183 // if ((currentCharacter == '\n')
1184 // || (currentCharacter == '\r')) {
1185 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1186 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1187 // if (currentPosition + lookAhead == source.length)
1189 // if (source[currentPosition + lookAhead] == '\n')
1191 // if (source[currentPosition + lookAhead] == '\"') {
1192 // currentPosition += lookAhead + 1;
1196 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1198 if (currentCharacter == '\\') {
1199 int escapeSize = currentPosition;
1200 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1201 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1202 scanDoubleQuotedEscapeCharacter();
1203 escapeSize = currentPosition - escapeSize;
1204 if (withoutUnicodePtr == 0) {
1205 //buffer all the entries that have been left aside....
1206 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1207 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1208 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1209 } else { //overwrite the / in the buffer
1210 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1211 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1212 withoutUnicodePtr--;
1216 // consume next character
1217 unicodeAsBackSlash = false;
1218 currentCharacter = source[currentPosition++];
1219 // if (((currentCharacter = source[currentPosition++]) == '\\')
1220 // && (source[currentPosition] == 'u')) {
1221 // getNextUnicodeChar();
1223 if (withoutUnicodePtr != 0) {
1224 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1229 } catch (IndexOutOfBoundsException e) {
1230 throw new InvalidInputException(UNTERMINATED_STRING);
1231 } catch (InvalidInputException e) {
1232 if (e.getMessage().equals(INVALID_ESCAPE)) {
1233 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1234 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1235 if (currentPosition + lookAhead == source.length)
1237 if (source[currentPosition + lookAhead] == '\n')
1239 if (source[currentPosition + lookAhead] == '\"') {
1240 currentPosition += lookAhead + 1;
1248 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1249 if (currentLine == null) {
1250 currentLine = new NLSLine();
1251 lines.add(currentLine);
1253 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1255 return TokenNameStringLiteral;
1258 // consume next character
1259 unicodeAsBackSlash = false;
1260 currentCharacter = source[currentPosition++];
1261 // if (((currentCharacter = source[currentPosition++]) == '\\')
1262 // && (source[currentPosition] == 'u')) {
1263 // getNextUnicodeChar();
1265 // if (withoutUnicodePtr != 0) {
1266 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1267 // currentCharacter;
1271 while (currentCharacter != '`') {
1273 /** ** in PHP \r and \n are valid in string literals *** */
1274 // if ((currentCharacter == '\n')
1275 // || (currentCharacter == '\r')) {
1276 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1277 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1278 // if (currentPosition + lookAhead == source.length)
1280 // if (source[currentPosition + lookAhead] == '\n')
1282 // if (source[currentPosition + lookAhead] == '\"') {
1283 // currentPosition += lookAhead + 1;
1287 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1289 if (currentCharacter == '\\') {
1290 int escapeSize = currentPosition;
1291 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1292 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1293 scanDoubleQuotedEscapeCharacter();
1294 escapeSize = currentPosition - escapeSize;
1295 if (withoutUnicodePtr == 0) {
1296 //buffer all the entries that have been left aside....
1297 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1298 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1299 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1300 } else { //overwrite the / in the buffer
1301 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1302 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1303 withoutUnicodePtr--;
1307 // consume next character
1308 unicodeAsBackSlash = false;
1309 currentCharacter = source[currentPosition++];
1310 // if (((currentCharacter = source[currentPosition++]) == '\\')
1311 // && (source[currentPosition] == 'u')) {
1312 // getNextUnicodeChar();
1314 if (withoutUnicodePtr != 0) {
1315 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1320 } catch (IndexOutOfBoundsException e) {
1321 throw new InvalidInputException(UNTERMINATED_STRING);
1322 } catch (InvalidInputException e) {
1323 if (e.getMessage().equals(INVALID_ESCAPE)) {
1324 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1325 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1326 if (currentPosition + lookAhead == source.length)
1328 if (source[currentPosition + lookAhead] == '\n')
1330 if (source[currentPosition + lookAhead] == '`') {
1331 currentPosition += lookAhead + 1;
1339 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1340 if (currentLine == null) {
1341 currentLine = new NLSLine();
1342 lines.add(currentLine);
1344 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1346 return TokenNameStringInterpolated;
1351 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1353 int endPositionForLineComment = 0;
1354 try { //get the next char
1355 currentCharacter = source[currentPosition++];
1356 // if (((currentCharacter = source[currentPosition++])
1358 // && (source[currentPosition] == 'u')) {
1359 // //-------------unicode traitement ------------
1360 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1361 // currentPosition++;
1362 // while (source[currentPosition] == 'u') {
1363 // currentPosition++;
1366 // Character.getNumericValue(source[currentPosition++]))
1370 // Character.getNumericValue(source[currentPosition++]))
1374 // Character.getNumericValue(source[currentPosition++]))
1378 // Character.getNumericValue(source[currentPosition++]))
1381 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1383 // currentCharacter =
1384 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1388 //handle the \\u case manually into comment
1389 // if (currentCharacter == '\\') {
1390 // if (source[currentPosition] == '\\')
1391 // currentPosition++;
1392 // } //jump over the \\
1393 boolean isUnicode = false;
1394 while (currentCharacter != '\r' && currentCharacter != '\n') {
1395 if (currentCharacter == '?') {
1396 if (getNextChar('>')) {
1397 startPosition = currentPosition - 2;
1399 return TokenNameStopPHP;
1405 currentCharacter = source[currentPosition++];
1406 // if (((currentCharacter = source[currentPosition++])
1408 // && (source[currentPosition] == 'u')) {
1409 // isUnicode = true;
1410 // //-------------unicode traitement ------------
1411 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1412 // currentPosition++;
1413 // while (source[currentPosition] == 'u') {
1414 // currentPosition++;
1417 // Character.getNumericValue(source[currentPosition++]))
1421 // Character.getNumericValue(
1422 // source[currentPosition++]))
1426 // Character.getNumericValue(
1427 // source[currentPosition++]))
1431 // Character.getNumericValue(
1432 // source[currentPosition++]))
1435 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1437 // currentCharacter =
1438 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1441 //handle the \\u case manually into comment
1442 // if (currentCharacter == '\\') {
1443 // if (source[currentPosition] == '\\')
1444 // currentPosition++;
1445 // } //jump over the \\
1448 endPositionForLineComment = currentPosition - 6;
1450 endPositionForLineComment = currentPosition - 1;
1452 recordComment(false);
1453 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1454 checkNonExternalizeString();
1455 if (recordLineSeparator) {
1457 pushUnicodeLineSeparator();
1459 pushLineSeparator();
1465 if (tokenizeComments) {
1467 currentPosition = endPositionForLineComment;
1468 // reset one character behind
1470 return TokenNameCOMMENT_LINE;
1472 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1473 if (tokenizeComments) {
1475 // reset one character behind
1476 return TokenNameCOMMENT_LINE;
1482 //traditional and annotation comment
1483 boolean isJavadoc = false, star = false;
1484 // consume next character
1485 unicodeAsBackSlash = false;
1486 currentCharacter = source[currentPosition++];
1487 // if (((currentCharacter = source[currentPosition++]) == '\\')
1488 // && (source[currentPosition] == 'u')) {
1489 // getNextUnicodeChar();
1491 // if (withoutUnicodePtr != 0) {
1492 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1493 // currentCharacter;
1497 if (currentCharacter == '*') {
1501 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1502 checkNonExternalizeString();
1503 if (recordLineSeparator) {
1504 pushLineSeparator();
1509 try { //get the next char
1510 currentCharacter = source[currentPosition++];
1511 // if (((currentCharacter = source[currentPosition++])
1513 // && (source[currentPosition] == 'u')) {
1514 // //-------------unicode traitement ------------
1515 // getNextUnicodeChar();
1517 //handle the \\u case manually into comment
1518 // if (currentCharacter == '\\') {
1519 // if (source[currentPosition] == '\\')
1520 // currentPosition++;
1521 // //jump over the \\
1523 // empty comment is not a javadoc /**/
1524 if (currentCharacter == '/') {
1527 //loop until end of comment */
1528 while ((currentCharacter != '/') || (!star)) {
1529 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1530 checkNonExternalizeString();
1531 if (recordLineSeparator) {
1532 pushLineSeparator();
1537 star = currentCharacter == '*';
1539 currentCharacter = source[currentPosition++];
1540 // if (((currentCharacter = source[currentPosition++])
1542 // && (source[currentPosition] == 'u')) {
1543 // //-------------unicode traitement ------------
1544 // getNextUnicodeChar();
1546 //handle the \\u case manually into comment
1547 // if (currentCharacter == '\\') {
1548 // if (source[currentPosition] == '\\')
1549 // currentPosition++;
1550 // } //jump over the \\
1552 recordComment(isJavadoc);
1553 if (tokenizeComments) {
1555 return TokenNameCOMMENT_PHPDOC;
1556 return TokenNameCOMMENT_BLOCK;
1558 } catch (IndexOutOfBoundsException e) {
1559 throw new InvalidInputException(UNTERMINATED_COMMENT);
1563 if (getNextChar('='))
1564 return TokenNameDIVIDE_EQUAL;
1565 return TokenNameDIVIDE;
1569 return TokenNameEOF;
1570 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1571 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1574 if (currentCharacter == '$') {
1575 while ((currentCharacter = source[currentPosition++]) == '$') {
1577 if (currentCharacter == '{')
1578 return TokenNameDOLLAR_LBRACE;
1579 if (isPHPIdentifierStart(currentCharacter))
1580 return scanIdentifierOrKeyword(true);
1581 return TokenNameERROR;
1583 if (isPHPIdentifierStart(currentCharacter))
1584 return scanIdentifierOrKeyword(false);
1585 if (Character.isDigit(currentCharacter))
1586 return scanNumber(false);
1587 return TokenNameERROR;
1590 } //-----------------end switch while try--------------------
1591 catch (IndexOutOfBoundsException e) {
1594 return TokenNameEOF;
1597 // public final void getNextUnicodeChar()
1598 // throws IndexOutOfBoundsException, InvalidInputException {
1600 // //handle the case of unicode.
1601 // //when a unicode appears then we must use a buffer that holds char internal values
1602 // //At the end of this method currentCharacter holds the new visited char
1603 // //and currentPosition points right next after it
1605 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1607 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1608 // currentPosition++;
1609 // while (source[currentPosition] == 'u') {
1610 // currentPosition++;
1614 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1616 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1618 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1620 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1622 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1624 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1625 // //need the unicode buffer
1626 // if (withoutUnicodePtr == 0) {
1627 // //buffer all the entries that have been left aside....
1628 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1629 // System.arraycopy(
1632 // withoutUnicodeBuffer,
1634 // withoutUnicodePtr);
1636 // //fill the buffer with the char
1637 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1639 // unicodeAsBackSlash = currentCharacter == '\\';
1642 * Tokenize a method body, assuming that curly brackets are properly balanced.
1644 public final void jumpOverMethodBody() {
1646 this.wasAcr = false;
1649 while (true) { //loop for jumping over comments
1650 // ---------Consume white space and handles startPosition---------
1651 boolean isWhiteSpace;
1653 startPosition = currentPosition;
1654 currentCharacter = source[currentPosition++];
1655 // if (((currentCharacter = source[currentPosition++]) == '\\')
1656 // && (source[currentPosition] == 'u')) {
1657 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1659 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1660 pushLineSeparator();
1661 isWhiteSpace = Character.isWhitespace(currentCharacter);
1663 } while (isWhiteSpace);
1665 // -------consume token until } is found---------
1666 switch (currentCharacter) {
1678 test = getNextChar('\\');
1681 scanDoubleQuotedEscapeCharacter();
1682 } catch (InvalidInputException ex) {
1685 // try { // consume next character
1686 unicodeAsBackSlash = false;
1687 currentCharacter = source[currentPosition++];
1688 // if (((currentCharacter = source[currentPosition++]) == '\\')
1689 // && (source[currentPosition] == 'u')) {
1690 // getNextUnicodeChar();
1692 if (withoutUnicodePtr != 0) {
1693 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1696 // } catch (InvalidInputException ex) {
1704 // try { // consume next character
1705 unicodeAsBackSlash = false;
1706 currentCharacter = source[currentPosition++];
1707 // if (((currentCharacter = source[currentPosition++]) == '\\')
1708 // && (source[currentPosition] == 'u')) {
1709 // getNextUnicodeChar();
1711 if (withoutUnicodePtr != 0) {
1712 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1715 // } catch (InvalidInputException ex) {
1717 while (currentCharacter != '"') {
1718 if (currentCharacter == '\r') {
1719 if (source[currentPosition] == '\n')
1722 // the string cannot go further that the line
1724 if (currentCharacter == '\n') {
1726 // the string cannot go further that the line
1728 if (currentCharacter == '\\') {
1730 scanDoubleQuotedEscapeCharacter();
1731 } catch (InvalidInputException ex) {
1734 // try { // consume next character
1735 unicodeAsBackSlash = false;
1736 currentCharacter = source[currentPosition++];
1737 // if (((currentCharacter = source[currentPosition++]) == '\\')
1738 // && (source[currentPosition] == 'u')) {
1739 // getNextUnicodeChar();
1741 if (withoutUnicodePtr != 0) {
1742 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1745 // } catch (InvalidInputException ex) {
1748 } catch (IndexOutOfBoundsException e) {
1755 if ((test = getNextChar('/', '*')) == 0) {
1759 currentCharacter = source[currentPosition++];
1760 // if (((currentCharacter = source[currentPosition++]) == '\\')
1761 // && (source[currentPosition] == 'u')) {
1762 // //-------------unicode traitement ------------
1763 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1764 // currentPosition++;
1765 // while (source[currentPosition] == 'u') {
1766 // currentPosition++;
1769 // Character.getNumericValue(source[currentPosition++]))
1773 // Character.getNumericValue(source[currentPosition++]))
1777 // Character.getNumericValue(source[currentPosition++]))
1781 // Character.getNumericValue(source[currentPosition++]))
1784 // //error don't care of the value
1785 // currentCharacter = 'A';
1786 // } //something different from \n and \r
1788 // currentCharacter =
1789 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1793 while (currentCharacter != '\r' && currentCharacter != '\n') {
1795 currentCharacter = source[currentPosition++];
1796 // if (((currentCharacter = source[currentPosition++])
1798 // && (source[currentPosition] == 'u')) {
1799 // //-------------unicode traitement ------------
1800 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1801 // currentPosition++;
1802 // while (source[currentPosition] == 'u') {
1803 // currentPosition++;
1806 // Character.getNumericValue(source[currentPosition++]))
1810 // Character.getNumericValue(source[currentPosition++]))
1814 // Character.getNumericValue(source[currentPosition++]))
1818 // Character.getNumericValue(source[currentPosition++]))
1821 // //error don't care of the value
1822 // currentCharacter = 'A';
1823 // } //something different from \n and \r
1825 // currentCharacter =
1826 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1830 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1831 pushLineSeparator();
1832 } catch (IndexOutOfBoundsException e) {
1833 } //an eof will them be generated
1837 //traditional and annotation comment
1838 boolean star = false;
1839 // try { // consume next character
1840 unicodeAsBackSlash = false;
1841 currentCharacter = source[currentPosition++];
1842 // if (((currentCharacter = source[currentPosition++]) == '\\')
1843 // && (source[currentPosition] == 'u')) {
1844 // getNextUnicodeChar();
1846 if (withoutUnicodePtr != 0) {
1847 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1850 // } catch (InvalidInputException ex) {
1852 if (currentCharacter == '*') {
1855 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1856 pushLineSeparator();
1857 try { //get the next char
1858 currentCharacter = source[currentPosition++];
1859 // if (((currentCharacter = source[currentPosition++]) == '\\')
1860 // && (source[currentPosition] == 'u')) {
1861 // //-------------unicode traitement ------------
1862 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1863 // currentPosition++;
1864 // while (source[currentPosition] == 'u') {
1865 // currentPosition++;
1868 // Character.getNumericValue(source[currentPosition++]))
1872 // Character.getNumericValue(source[currentPosition++]))
1876 // Character.getNumericValue(source[currentPosition++]))
1880 // Character.getNumericValue(source[currentPosition++]))
1883 // //error don't care of the value
1884 // currentCharacter = 'A';
1885 // } //something different from * and /
1887 // currentCharacter =
1888 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1891 //loop until end of comment */
1892 while ((currentCharacter != '/') || (!star)) {
1893 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1894 pushLineSeparator();
1895 star = currentCharacter == '*';
1897 currentCharacter = source[currentPosition++];
1898 // if (((currentCharacter = source[currentPosition++])
1900 // && (source[currentPosition] == 'u')) {
1901 // //-------------unicode traitement ------------
1902 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1903 // currentPosition++;
1904 // while (source[currentPosition] == 'u') {
1905 // currentPosition++;
1908 // Character.getNumericValue(source[currentPosition++]))
1912 // Character.getNumericValue(source[currentPosition++]))
1916 // Character.getNumericValue(source[currentPosition++]))
1920 // Character.getNumericValue(source[currentPosition++]))
1923 // //error don't care of the value
1924 // currentCharacter = 'A';
1925 // } //something different from * and /
1927 // currentCharacter =
1928 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1932 } catch (IndexOutOfBoundsException e) {
1941 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1943 scanIdentifierOrKeyword((currentCharacter == '$'));
1944 } catch (InvalidInputException ex) {
1948 if (Character.isDigit(currentCharacter)) {
1951 } catch (InvalidInputException ex) {
1957 //-----------------end switch while try--------------------
1958 } catch (IndexOutOfBoundsException e) {
1959 } catch (InvalidInputException e) {
1963 // public final boolean jumpOverUnicodeWhiteSpace()
1964 // throws InvalidInputException {
1966 // //handle the case of unicode. Jump over the next whiteSpace
1967 // //making startPosition pointing on the next available char
1968 // //On false, the currentCharacter is filled up with a potential
1972 // this.wasAcr = false;
1973 // int c1, c2, c3, c4;
1974 // int unicodeSize = 6;
1975 // currentPosition++;
1976 // while (source[currentPosition] == 'u') {
1977 // currentPosition++;
1981 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1983 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1985 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1987 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1989 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1992 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1993 // if (recordLineSeparator
1994 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1995 // pushLineSeparator();
1996 // if (Character.isWhitespace(currentCharacter))
1999 // //buffer the new char which is not a white space
2000 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2001 // //withoutUnicodePtr == 1 is true here
2003 // } catch (IndexOutOfBoundsException e) {
2004 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2007 public final int[] getLineEnds() {
2008 //return a bounded copy of this.lineEnds
2011 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2015 public char[] getSource() {
2018 final char[] optimizedCurrentTokenSource1() {
2019 //return always the same char[] build only once
2021 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2022 char charOne = source[startPosition];
2077 return new char[] { charOne };
2081 final char[] optimizedCurrentTokenSource2() {
2082 //try to return the same char[] build only once
2085 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2086 char[][] table = charArray_length[0][hash];
2088 while (++i < InternalTableSize) {
2089 char[] charArray = table[i];
2090 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2093 //---------other side---------
2095 int max = newEntry2;
2096 while (++i <= max) {
2097 char[] charArray = table[i];
2098 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2101 //--------add the entry-------
2102 if (++max >= InternalTableSize)
2105 table[max] = (r = new char[] { c0, c1 });
2110 final char[] optimizedCurrentTokenSource3() {
2111 //try to return the same char[] build only once
2115 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2117 char[][] table = charArray_length[1][hash];
2119 while (++i < InternalTableSize) {
2120 char[] charArray = table[i];
2121 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2124 //---------other side---------
2126 int max = newEntry3;
2127 while (++i <= max) {
2128 char[] charArray = table[i];
2129 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2132 //--------add the entry-------
2133 if (++max >= InternalTableSize)
2136 table[max] = (r = new char[] { c0, c1, c2 });
2141 final char[] optimizedCurrentTokenSource4() {
2142 //try to return the same char[] build only once
2144 char c0, c1, c2, c3;
2146 ((((long) (c0 = source[startPosition])) << 18)
2147 + ((c1 = source[startPosition + 1]) << 12)
2148 + ((c2 = source[startPosition + 2]) << 6)
2149 + (c3 = source[startPosition + 3]))
2151 char[][] table = charArray_length[2][(int) hash];
2153 while (++i < InternalTableSize) {
2154 char[] charArray = table[i];
2155 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2158 //---------other side---------
2160 int max = newEntry4;
2161 while (++i <= max) {
2162 char[] charArray = table[i];
2163 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2166 //--------add the entry-------
2167 if (++max >= InternalTableSize)
2170 table[max] = (r = new char[] { c0, c1, c2, c3 });
2176 final char[] optimizedCurrentTokenSource5() {
2177 //try to return the same char[] build only once
2179 char c0, c1, c2, c3, c4;
2181 ((((long) (c0 = source[startPosition])) << 24)
2182 + (((long) (c1 = source[startPosition + 1])) << 18)
2183 + ((c2 = source[startPosition + 2]) << 12)
2184 + ((c3 = source[startPosition + 3]) << 6)
2185 + (c4 = source[startPosition + 4]))
2187 char[][] table = charArray_length[3][(int) hash];
2189 while (++i < InternalTableSize) {
2190 char[] charArray = table[i];
2191 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2194 //---------other side---------
2196 int max = newEntry5;
2197 while (++i <= max) {
2198 char[] charArray = table[i];
2199 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2202 //--------add the entry-------
2203 if (++max >= InternalTableSize)
2206 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2212 final char[] optimizedCurrentTokenSource6() {
2213 //try to return the same char[] build only once
2215 char c0, c1, c2, c3, c4, c5;
2217 ((((long) (c0 = source[startPosition])) << 32)
2218 + (((long) (c1 = source[startPosition + 1])) << 24)
2219 + (((long) (c2 = source[startPosition + 2])) << 18)
2220 + ((c3 = source[startPosition + 3]) << 12)
2221 + ((c4 = source[startPosition + 4]) << 6)
2222 + (c5 = source[startPosition + 5]))
2224 char[][] table = charArray_length[4][(int) hash];
2226 while (++i < InternalTableSize) {
2227 char[] charArray = table[i];
2228 if ((c0 == charArray[0])
2229 && (c1 == charArray[1])
2230 && (c2 == charArray[2])
2231 && (c3 == charArray[3])
2232 && (c4 == charArray[4])
2233 && (c5 == charArray[5]))
2236 //---------other side---------
2238 int max = newEntry6;
2239 while (++i <= max) {
2240 char[] charArray = table[i];
2241 if ((c0 == charArray[0])
2242 && (c1 == charArray[1])
2243 && (c2 == charArray[2])
2244 && (c3 == charArray[3])
2245 && (c4 == charArray[4])
2246 && (c5 == charArray[5]))
2249 //--------add the entry-------
2250 if (++max >= InternalTableSize)
2253 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2258 public final void pushLineSeparator() throws InvalidInputException {
2259 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2260 final int INCREMENT = 250;
2262 if (this.checkNonExternalizedStringLiterals) {
2263 // reinitialize the current line for non externalize strings purpose
2266 //currentCharacter is at position currentPosition-1
2269 if (currentCharacter == '\r') {
2270 int separatorPos = currentPosition - 1;
2271 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2273 //System.out.println("CR-" + separatorPos);
2275 lineEnds[++linePtr] = separatorPos;
2276 } catch (IndexOutOfBoundsException e) {
2277 //linePtr value is correct
2278 int oldLength = lineEnds.length;
2279 int[] old = lineEnds;
2280 lineEnds = new int[oldLength + INCREMENT];
2281 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2282 lineEnds[linePtr] = separatorPos;
2284 // look-ahead for merged cr+lf
2286 if (source[currentPosition] == '\n') {
2287 //System.out.println("look-ahead LF-" + currentPosition);
2288 lineEnds[linePtr] = currentPosition;
2294 } catch (IndexOutOfBoundsException e) {
2299 if (currentCharacter == '\n') {
2300 //must merge eventual cr followed by lf
2301 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2302 //System.out.println("merge LF-" + (currentPosition - 1));
2303 lineEnds[linePtr] = currentPosition - 1;
2305 int separatorPos = currentPosition - 1;
2306 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2308 // System.out.println("LF-" + separatorPos);
2310 lineEnds[++linePtr] = separatorPos;
2311 } catch (IndexOutOfBoundsException e) {
2312 //linePtr value is correct
2313 int oldLength = lineEnds.length;
2314 int[] old = lineEnds;
2315 lineEnds = new int[oldLength + INCREMENT];
2316 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2317 lineEnds[linePtr] = separatorPos;
2324 public final void pushUnicodeLineSeparator() {
2325 // isUnicode means that the \r or \n has been read as a unicode character
2327 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2329 final int INCREMENT = 250;
2330 //currentCharacter is at position currentPosition-1
2332 if (this.checkNonExternalizedStringLiterals) {
2333 // reinitialize the current line for non externalize strings purpose
2338 if (currentCharacter == '\r') {
2339 int separatorPos = currentPosition - 6;
2340 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2342 //System.out.println("CR-" + separatorPos);
2344 lineEnds[++linePtr] = separatorPos;
2345 } catch (IndexOutOfBoundsException e) {
2346 //linePtr value is correct
2347 int oldLength = lineEnds.length;
2348 int[] old = lineEnds;
2349 lineEnds = new int[oldLength + INCREMENT];
2350 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2351 lineEnds[linePtr] = separatorPos;
2353 // look-ahead for merged cr+lf
2354 if (source[currentPosition] == '\n') {
2355 //System.out.println("look-ahead LF-" + currentPosition);
2356 lineEnds[linePtr] = currentPosition;
2364 if (currentCharacter == '\n') {
2365 //must merge eventual cr followed by lf
2366 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2367 //System.out.println("merge LF-" + (currentPosition - 1));
2368 lineEnds[linePtr] = currentPosition - 6;
2370 int separatorPos = currentPosition - 6;
2371 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2373 // System.out.println("LF-" + separatorPos);
2375 lineEnds[++linePtr] = separatorPos;
2376 } catch (IndexOutOfBoundsException e) {
2377 //linePtr value is correct
2378 int oldLength = lineEnds.length;
2379 int[] old = lineEnds;
2380 lineEnds = new int[oldLength + INCREMENT];
2381 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2382 lineEnds[linePtr] = separatorPos;
2389 public final void recordComment(boolean isJavadoc) {
2391 // a new annotation comment is recorded
2393 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2394 } catch (IndexOutOfBoundsException e) {
2395 int oldStackLength = commentStops.length;
2396 int[] oldStack = commentStops;
2397 commentStops = new int[oldStackLength + 30];
2398 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2399 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2400 //grows the positions buffers too
2401 int[] old = commentStarts;
2402 commentStarts = new int[oldStackLength + 30];
2403 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2406 //the buffer is of a correct size here
2407 commentStarts[commentPtr] = startPosition;
2409 public void resetTo(int begin, int end) {
2410 //reset the scanner to a given position where it may rescan again
2413 initialPosition = startPosition = currentPosition = begin;
2414 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2415 commentPtr = -1; // reset comment stack
2418 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2419 // the string with "\\u" is a legal string of two chars \ and u
2420 //thus we use a direct access to the source (for regular cases).
2422 // if (unicodeAsBackSlash) {
2423 // // consume next character
2424 // unicodeAsBackSlash = false;
2425 // if (((currentCharacter = source[currentPosition++]) == '\\')
2426 // && (source[currentPosition] == 'u')) {
2427 // getNextUnicodeChar();
2429 // if (withoutUnicodePtr != 0) {
2430 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2434 currentCharacter = source[currentPosition++];
2435 switch (currentCharacter) {
2437 currentCharacter = '\'';
2440 currentCharacter = '\\';
2443 currentCharacter = '\\';
2448 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2449 // the string with "\\u" is a legal string of two chars \ and u
2450 //thus we use a direct access to the source (for regular cases).
2452 // if (unicodeAsBackSlash) {
2453 // // consume next character
2454 // unicodeAsBackSlash = false;
2455 // if (((currentCharacter = source[currentPosition++]) == '\\')
2456 // && (source[currentPosition] == 'u')) {
2457 // getNextUnicodeChar();
2459 // if (withoutUnicodePtr != 0) {
2460 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2464 currentCharacter = source[currentPosition++];
2465 switch (currentCharacter) {
2467 // currentCharacter = '\b';
2470 currentCharacter = '\t';
2473 currentCharacter = '\n';
2476 // currentCharacter = '\f';
2479 currentCharacter = '\r';
2482 currentCharacter = '\"';
2485 currentCharacter = '\'';
2488 currentCharacter = '\\';
2491 currentCharacter = '$';
2494 // -----------octal escape--------------
2496 // OctalDigit OctalDigit
2497 // ZeroToThree OctalDigit OctalDigit
2499 int number = Character.getNumericValue(currentCharacter);
2500 if (number >= 0 && number <= 7) {
2501 boolean zeroToThreeNot = number > 3;
2502 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503 int digit = Character.getNumericValue(currentCharacter);
2504 if (digit >= 0 && digit <= 7) {
2505 number = (number * 8) + digit;
2506 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2507 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2510 digit = Character.getNumericValue(currentCharacter);
2511 if (digit >= 0 && digit <= 7) {
2512 // has read \ZeroToThree OctalDigit OctalDigit
2513 number = (number * 8) + digit;
2514 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2518 } else { // has read \OctalDigit NonDigit--> ignore last character
2521 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2524 } else { // has read \OctalDigit --> ignore last character
2528 throw new InvalidInputException(INVALID_ESCAPE);
2529 currentCharacter = (char) number;
2532 // throw new InvalidInputException(INVALID_ESCAPE);
2536 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2537 // return scanIdentifierOrKeyword( false );
2540 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2543 //first dispatch on the first char.
2544 //then the length. If there are several
2545 //keywors with the same length AND the same first char, then do another
2546 //disptach on the second char :-)...cool....but fast !
2548 useAssertAsAnIndentifier = false;
2550 while (getNextCharAsJavaIdentifierPart()) {
2554 if (new String(getCurrentTokenSource()).equals("$this")) {
2555 return TokenNamethis;
2557 return TokenNameVariable;
2562 // if (withoutUnicodePtr == 0)
2564 //quick test on length == 1 but not on length > 12 while most identifier
2565 //have a length which is <= 12...but there are lots of identifier with
2569 if ((length = currentPosition - startPosition) == 1)
2570 return TokenNameIdentifier;
2572 data = new char[length];
2573 index = startPosition;
2574 for (int i = 0; i < length; i++) {
2575 data[i] = Character.toLowerCase(source[index + i]);
2579 // if ((length = withoutUnicodePtr) == 1)
2580 // return TokenNameIdentifier;
2581 // // data = withoutUnicodeBuffer;
2582 // data = new char[withoutUnicodeBuffer.length];
2583 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2584 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2589 firstLetter = data[index];
2590 switch (firstLetter) {
2592 case 'a' : // as and array abstract
2595 if ((data[++index] == 's')) {
2598 return TokenNameIdentifier;
2601 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2602 return TokenNameAND;
2604 return TokenNameIdentifier;
2607 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2608 return TokenNamearray;
2610 return TokenNameIdentifier;
2612 if ((data[++index] == 'b')
2613 && (data[++index] == 's')
2614 && (data[++index] == 't')
2615 && (data[++index] == 'r')
2616 && (data[++index] == 'a')
2617 && (data[++index] == 'c')
2618 && (data[++index] == 't'))
2619 return TokenNameabstract;
2621 return TokenNameIdentifier;
2623 return TokenNameIdentifier;
2628 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2629 return TokenNamebreak;
2631 return TokenNameIdentifier;
2633 return TokenNameIdentifier;
2636 case 'c' : //case catch class const continue
2639 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2640 return TokenNamecase;
2642 return TokenNameIdentifier;
2644 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
2645 return TokenNamecatch;
2646 if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2647 return TokenNameclass;
2648 if ((data[index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
2649 return TokenNameconst;
2651 return TokenNameIdentifier;
2653 if ((data[++index] == 'o')
2654 && (data[++index] == 'n')
2655 && (data[++index] == 't')
2656 && (data[++index] == 'i')
2657 && (data[++index] == 'n')
2658 && (data[++index] == 'u')
2659 && (data[++index] == 'e'))
2660 return TokenNamecontinue;
2662 return TokenNameIdentifier;
2664 return TokenNameIdentifier;
2667 case 'd' : //define declare default do die
2670 if ((data[++index] == 'o'))
2673 return TokenNameIdentifier;
2675 if ((data[++index] == 'i') && (data[++index] == 'e'))
2676 return TokenNamedie;
2678 return TokenNameIdentifier;
2680 if ((data[++index] == 'e')
2681 && (data[++index] == 'f')
2682 && (data[++index] == 'i')
2683 && (data[++index] == 'n')
2684 && (data[++index] == 'e'))
2685 return TokenNamedefine;
2687 return TokenNameIdentifier;
2689 if ((data[++index] == 'e')
2690 && (data[++index] == 'c')
2691 && (data[++index] == 'l')
2692 && (data[++index] == 'a')
2693 && (data[++index] == 'r')
2694 && (data[++index] == 'e'))
2695 return TokenNamedeclare;
2697 if ((data[++index] == 'e')
2698 && (data[++index] == 'f')
2699 && (data[++index] == 'a')
2700 && (data[++index] == 'u')
2701 && (data[++index] == 'l')
2702 && (data[++index] == 't'))
2703 return TokenNamedefault;
2705 return TokenNameIdentifier;
2707 return TokenNameIdentifier;
2709 case 'e' : //echo else exit elseif extends eval
2712 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2713 return TokenNameecho;
2714 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2715 return TokenNameelse;
2716 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
2717 return TokenNameexit;
2718 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
2719 return TokenNameeval;
2721 return TokenNameIdentifier;
2722 case 5 : // endif empty
2723 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2724 return TokenNameendif;
2725 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
2726 return TokenNameempty;
2728 return TokenNameIdentifier;
2730 if ((data[++index] == 'n')
2731 && (data[++index] == 'd')
2732 && (data[++index] == 'f')
2733 && (data[++index] == 'o')
2734 && (data[++index] == 'r'))
2735 return TokenNameendfor;
2737 (data[index] == 'l')
2738 && (data[++index] == 's')
2739 && (data[++index] == 'e')
2740 && (data[++index] == 'i')
2741 && (data[++index] == 'f'))
2742 return TokenNameelseif;
2744 return TokenNameIdentifier;
2746 if ((data[++index] == 'x')
2747 && (data[++index] == 't')
2748 && (data[++index] == 'e')
2749 && (data[++index] == 'n')
2750 && (data[++index] == 'd')
2751 && (data[++index] == 's'))
2752 return TokenNameextends;
2754 return TokenNameIdentifier;
2755 case 8 : // endwhile
2756 if ((data[++index] == 'n')
2757 && (data[++index] == 'd')
2758 && (data[++index] == 'w')
2759 && (data[++index] == 'h')
2760 && (data[++index] == 'i')
2761 && (data[++index] == 'l')
2762 && (data[++index] == 'e'))
2763 return TokenNameendwhile;
2765 return TokenNameIdentifier;
2766 case 9 : // endswitch
2767 if ((data[++index] == 'n')
2768 && (data[++index] == 'd')
2769 && (data[++index] == 's')
2770 && (data[++index] == 'w')
2771 && (data[++index] == 'i')
2772 && (data[++index] == 't')
2773 && (data[++index] == 'c')
2774 && (data[++index] == 'h'))
2775 return TokenNameendswitch;
2777 return TokenNameIdentifier;
2778 case 10 : // enddeclare
2779 if ((data[++index] == 'n')
2780 && (data[++index] == 'd')
2781 && (data[++index] == 'd')
2782 && (data[++index] == 'e')
2783 && (data[++index] == 'c')
2784 && (data[++index] == 'l')
2785 && (data[++index] == 'a')
2786 && (data[++index] == 'r')
2787 && (data[++index] == 'e'))
2788 return TokenNameendforeach;
2790 if ((data[++index] == 'n') // endforeach
2791 && (data[++index] == 'd')
2792 && (data[++index] == 'f')
2793 && (data[++index] == 'o')
2794 && (data[++index] == 'r')
2795 && (data[++index] == 'e')
2796 && (data[++index] == 'a')
2797 && (data[++index] == 'c')
2798 && (data[++index] == 'h'))
2799 return TokenNameendforeach;
2801 return TokenNameIdentifier;
2804 return TokenNameIdentifier;
2807 case 'f' : //for false final function
2810 if ((data[++index] == 'o') && (data[++index] == 'r'))
2811 return TokenNamefor;
2813 return TokenNameIdentifier;
2815 if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2816 return TokenNamefalse;
2817 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
2818 return TokenNamefinal;
2820 return TokenNameIdentifier;
2822 if ((data[++index] == 'o')
2823 && (data[++index] == 'r')
2824 && (data[++index] == 'e')
2825 && (data[++index] == 'a')
2826 && (data[++index] == 'c')
2827 && (data[++index] == 'h'))
2828 return TokenNameforeach;
2830 return TokenNameIdentifier;
2831 case 8 : // function
2832 if ((data[++index] == 'u')
2833 && (data[++index] == 'n')
2834 && (data[++index] == 'c')
2835 && (data[++index] == 't')
2836 && (data[++index] == 'i')
2837 && (data[++index] == 'o')
2838 && (data[++index] == 'n'))
2839 return TokenNamefunction;
2841 return TokenNameIdentifier;
2843 return TokenNameIdentifier;
2847 if ((data[++index] == 'l')
2848 && (data[++index] == 'o')
2849 && (data[++index] == 'b')
2850 && (data[++index] == 'a')
2851 && (data[++index] == 'l')) {
2852 return TokenNameglobal;
2855 return TokenNameIdentifier;
2857 case 'i' : //if int isset include include_once instanceof interface implements
2860 if (data[++index] == 'f')
2863 return TokenNameIdentifier;
2865 // if ((data[++index] == 'n') && (data[++index] == 't'))
2866 // return TokenNameint;
2868 // return TokenNameIdentifier;
2870 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
2871 return TokenNameisset;
2873 return TokenNameIdentifier;
2875 if ((data[++index] == 'n')
2876 && (data[++index] == 'c')
2877 && (data[++index] == 'l')
2878 && (data[++index] == 'u')
2879 && (data[++index] == 'd')
2880 && (data[++index] == 'e'))
2881 return TokenNameinclude;
2883 return TokenNameIdentifier;
2884 case 9 : // interface
2885 if ((data[++index] == 'n')
2886 && (data[++index] == 't')
2887 && (data[++index] == 'e')
2888 && (data[++index] == 'r')
2889 && (data[++index] == 'f')
2890 && (data[++index] == 'a')
2891 && (data[++index] == 'c')
2892 && (data[++index] == 'e'))
2893 return TokenNameinterface;
2895 return TokenNameIdentifier;
2896 case 10 : // instanceof
2897 if ((data[++index] == 'n')
2898 && (data[++index] == 's')
2899 && (data[++index] == 't')
2900 && (data[++index] == 'a')
2901 && (data[++index] == 'n')
2902 && (data[++index] == 'c')
2903 && (data[++index] == 'e')
2904 && (data[++index] == 'o')
2905 && (data[++index] == 'f'))
2906 return TokenNameinstanceof;
2907 if ((data[index] == 'm')
2908 && (data[++index] == 'p')
2909 && (data[++index] == 'l')
2910 && (data[++index] == 'e')
2911 && (data[++index] == 'm')
2912 && (data[++index] == 'e')
2913 && (data[++index] == 'n')
2914 && (data[++index] == 't')
2915 && (data[++index] == 's'))
2916 return TokenNameimplements;
2918 return TokenNameIdentifier;
2920 if ((data[++index] == 'n')
2921 && (data[++index] == 'c')
2922 && (data[++index] == 'l')
2923 && (data[++index] == 'u')
2924 && (data[++index] == 'd')
2925 && (data[++index] == 'e')
2926 && (data[++index] == '_')
2927 && (data[++index] == 'o')
2928 && (data[++index] == 'n')
2929 && (data[++index] == 'c')
2930 && (data[++index] == 'e'))
2931 return TokenNameinclude_once;
2933 return TokenNameIdentifier;
2935 return TokenNameIdentifier;
2940 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2941 return TokenNamelist;
2944 return TokenNameIdentifier;
2946 case 'n' : // new null
2949 if ((data[++index] == 'e') && (data[++index] == 'w'))
2950 return TokenNamenew;
2952 return TokenNameIdentifier;
2954 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2955 return TokenNamenull;
2957 return TokenNameIdentifier;
2960 return TokenNameIdentifier;
2962 case 'o' : // or old_function
2964 if (data[++index] == 'r') {
2968 // if (length == 12) {
2969 // if ((data[++index] == 'l')
2970 // && (data[++index] == 'd')
2971 // && (data[++index] == '_')
2972 // && (data[++index] == 'f')
2973 // && (data[++index] == 'u')
2974 // && (data[++index] == 'n')
2975 // && (data[++index] == 'c')
2976 // && (data[++index] == 't')
2977 // && (data[++index] == 'i')
2978 // && (data[++index] == 'o')
2979 // && (data[++index] == 'n')) {
2980 // return TokenNameold_function;
2983 return TokenNameIdentifier;
2985 case 'p' : // print public private protected
2988 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2989 return TokenNameprint;
2991 return TokenNameIdentifier;
2993 if ((data[++index] == 'u')
2994 && (data[++index] == 'b')
2995 && (data[++index] == 'l')
2996 && (data[++index] == 'i')
2997 && (data[++index] == 'c')) {
2998 return TokenNamepublic;
3000 return TokenNameIdentifier;
3002 if ((data[++index] == 'r')
3003 && (data[++index] == 'i')
3004 && (data[++index] == 'v')
3005 && (data[++index] == 'a')
3006 && (data[++index] == 't')
3007 && (data[++index] == 'e')) {
3008 return TokenNameprivate;
3010 return TokenNameIdentifier;
3012 if ((data[++index] == 'r')
3013 && (data[++index] == 'o')
3014 && (data[++index] == 't')
3015 && (data[++index] == 'e')
3016 && (data[++index] == 'c')
3017 && (data[++index] == 't')
3018 && (data[++index] == 'e')
3019 && (data[++index] == 'd')) {
3020 return TokenNameprotected;
3022 return TokenNameIdentifier;
3024 return TokenNameIdentifier;
3025 case 'r' : //return require require_once
3027 if ((data[++index] == 'e')
3028 && (data[++index] == 't')
3029 && (data[++index] == 'u')
3030 && (data[++index] == 'r')
3031 && (data[++index] == 'n')) {
3032 return TokenNamereturn;
3034 } else if (length == 7) {
3035 if ((data[++index] == 'e')
3036 && (data[++index] == 'q')
3037 && (data[++index] == 'u')
3038 && (data[++index] == 'i')
3039 && (data[++index] == 'r')
3040 && (data[++index] == 'e')) {
3041 return TokenNamerequire;
3043 } else if (length == 12) {
3044 if ((data[++index] == 'e')
3045 && (data[++index] == 'q')
3046 && (data[++index] == 'u')
3047 && (data[++index] == 'i')
3048 && (data[++index] == 'r')
3049 && (data[++index] == 'e')
3050 && (data[++index] == '_')
3051 && (data[++index] == 'o')
3052 && (data[++index] == 'n')
3053 && (data[++index] == 'c')
3054 && (data[++index] == 'e')) {
3055 return TokenNamerequire_once;
3058 return TokenNameIdentifier;
3060 case 's' : //static switch
3063 if (data[++index] == 't')
3064 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3065 return TokenNamestatic;
3067 return TokenNameIdentifier;
3069 (data[index] == 'w')
3070 && (data[++index] == 'i')
3071 && (data[++index] == 't')
3072 && (data[++index] == 'c')
3073 && (data[++index] == 'h'))
3074 return TokenNameswitch;
3076 return TokenNameIdentifier;
3078 return TokenNameIdentifier;
3081 case 't' : // try true throw
3084 if ((data[++index] == 'r') && (data[++index] == 'y'))
3085 return TokenNametry;
3087 return TokenNameIdentifier;
3089 if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
3090 return TokenNametrue;
3092 return TokenNameIdentifier;
3094 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3095 return TokenNamethrow;
3097 return TokenNameIdentifier;
3100 return TokenNameIdentifier;
3102 case 'u' : //use unset
3105 if ((data[++index] == 's') && (data[++index] == 'e'))
3106 return TokenNameuse;
3108 return TokenNameIdentifier;
3110 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3111 return TokenNameunset;
3113 return TokenNameIdentifier;
3115 return TokenNameIdentifier;
3120 if ((data[++index] == 'a') && (data[++index] == 'r'))
3121 return TokenNamevar;
3123 return TokenNameIdentifier;
3126 return TokenNameIdentifier;
3132 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3133 return TokenNamewhile;
3135 return TokenNameIdentifier;
3136 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&&
3137 // (data[++index]=='p'))
3138 //return TokenNamewidefp ;
3140 //return TokenNameIdentifier;
3142 return TokenNameIdentifier;
3148 if ((data[++index] == 'o') && (data[++index] == 'r'))
3149 return TokenNameXOR;
3151 return TokenNameIdentifier;
3154 return TokenNameIdentifier;
3157 return TokenNameIdentifier;
3160 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3162 //when entering this method the currentCharacter is the firt
3163 //digit of the number , i.e. it may be preceeded by a . when
3166 boolean floating = dotPrefix;
3167 if ((!dotPrefix) && (currentCharacter == '0')) {
3168 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3169 //force the first char of the hexa number do exist...
3170 // consume next character
3171 unicodeAsBackSlash = false;
3172 currentCharacter = source[currentPosition++];
3173 // if (((currentCharacter = source[currentPosition++]) == '\\')
3174 // && (source[currentPosition] == 'u')) {
3175 // getNextUnicodeChar();
3177 // if (withoutUnicodePtr != 0) {
3178 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3181 if (Character.digit(currentCharacter, 16) == -1)
3182 throw new InvalidInputException(INVALID_HEXA);
3184 while (getNextCharAsDigit(16)) {
3186 // if (getNextChar('l', 'L') >= 0)
3187 // return TokenNameLongLiteral;
3189 return TokenNameIntegerLiteral;
3192 //there is x or X in the number
3193 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3194 if (getNextCharAsDigit()) {
3195 //-------------potential octal-----------------
3196 while (getNextCharAsDigit()) {
3199 // if (getNextChar('l', 'L') >= 0) {
3200 // return TokenNameLongLiteral;
3203 // if (getNextChar('f', 'F') >= 0) {
3204 // return TokenNameFloatingPointLiteral;
3207 if (getNextChar('d', 'D') >= 0) {
3208 return TokenNameDoubleLiteral;
3209 } else { //make the distinction between octal and float ....
3210 if (getNextChar('.')) { //bingo ! ....
3211 while (getNextCharAsDigit()) {
3213 if (getNextChar('e', 'E') >= 0) {
3214 // consume next character
3215 unicodeAsBackSlash = false;
3216 currentCharacter = source[currentPosition++];
3217 // if (((currentCharacter = source[currentPosition++]) == '\\')
3218 // && (source[currentPosition] == 'u')) {
3219 // getNextUnicodeChar();
3221 // if (withoutUnicodePtr != 0) {
3222 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3226 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3227 // consume next character
3228 unicodeAsBackSlash = false;
3229 currentCharacter = source[currentPosition++];
3230 // if (((currentCharacter = source[currentPosition++]) == '\\')
3231 // && (source[currentPosition] == 'u')) {
3232 // getNextUnicodeChar();
3234 // if (withoutUnicodePtr != 0) {
3235 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3236 // currentCharacter;
3240 if (!Character.isDigit(currentCharacter))
3241 throw new InvalidInputException(INVALID_FLOAT);
3242 while (getNextCharAsDigit()) {
3245 // if (getNextChar('f', 'F') >= 0)
3246 // return TokenNameFloatingPointLiteral;
3247 getNextChar('d', 'D'); //jump over potential d or D
3248 return TokenNameDoubleLiteral;
3250 return TokenNameIntegerLiteral;
3258 while (getNextCharAsDigit()) {
3261 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3262 // return TokenNameLongLiteral;
3264 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3265 while (getNextCharAsDigit()) {
3270 //if floating is true both exponant and suffix may be optional
3272 if (getNextChar('e', 'E') >= 0) {
3274 // consume next character
3275 unicodeAsBackSlash = false;
3276 currentCharacter = source[currentPosition++];
3277 // if (((currentCharacter = source[currentPosition++]) == '\\')
3278 // && (source[currentPosition] == 'u')) {
3279 // getNextUnicodeChar();
3281 // if (withoutUnicodePtr != 0) {
3282 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3286 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3287 unicodeAsBackSlash = false;
3288 currentCharacter = source[currentPosition++];
3289 // if (((currentCharacter = source[currentPosition++]) == '\\')
3290 // && (source[currentPosition] == 'u')) {
3291 // getNextUnicodeChar();
3293 // if (withoutUnicodePtr != 0) {
3294 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3298 if (!Character.isDigit(currentCharacter))
3299 throw new InvalidInputException(INVALID_FLOAT);
3300 while (getNextCharAsDigit()) {
3304 if (getNextChar('d', 'D') >= 0)
3305 return TokenNameDoubleLiteral;
3306 // if (getNextChar('f', 'F') >= 0)
3307 // return TokenNameFloatingPointLiteral;
3309 //the long flag has been tested before
3311 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3314 * Search the line number corresponding to a specific position
3317 public final int getLineNumber(int position) {
3319 if (lineEnds == null)
3321 int length = linePtr + 1;
3324 int g = 0, d = length - 1;
3328 if (position < lineEnds[m]) {
3330 } else if (position > lineEnds[m]) {
3336 if (position < lineEnds[m]) {
3342 public void setPHPMode(boolean mode) {
3346 public final void setSource(char[] source) {
3347 //the source-buffer is set to sourceString
3349 if (source == null) {
3350 this.source = new char[0];
3352 this.source = source;
3355 initialPosition = currentPosition = 0;
3356 containsAssertKeyword = false;
3357 withoutUnicodeBuffer = new char[this.source.length];
3361 public String toString() {
3362 if (startPosition == source.length)
3363 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3364 if (currentPosition > source.length)
3365 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3367 char front[] = new char[startPosition];
3368 System.arraycopy(source, 0, front, 0, startPosition);
3370 int middleLength = (currentPosition - 1) - startPosition + 1;
3372 if (middleLength > -1) {
3373 middle = new char[middleLength];
3374 System.arraycopy(source, startPosition, middle, 0, middleLength);
3376 middle = new char[0];
3379 char end[] = new char[source.length - (currentPosition - 1)];
3380 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3382 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3383 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3386 public final String toStringAction(int act) {
3389 case TokenNameERROR :
3390 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3391 case TokenNameStopPHP :
3392 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3393 case TokenNameIdentifier :
3394 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3395 case TokenNameVariable :
3396 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3398 return "as"; //$NON-NLS-1$
3399 case TokenNamebreak :
3400 return "break"; //$NON-NLS-1$
3401 case TokenNamecase :
3402 return "case"; //$NON-NLS-1$
3403 case TokenNameclass :
3404 return "class"; //$NON-NLS-1$
3405 case TokenNamecontinue :
3406 return "continue"; //$NON-NLS-1$
3407 case TokenNamedefault :
3408 return "default"; //$NON-NLS-1$
3409 case TokenNamedefine :
3410 return "define"; //$NON-NLS-1$
3412 return "do"; //$NON-NLS-1$
3413 case TokenNameecho :
3414 return "echo"; //$NON-NLS-1$
3415 case TokenNameelse :
3416 return "else"; //$NON-NLS-1$
3417 case TokenNameelseif :
3418 return "elseif"; //$NON-NLS-1$
3419 case TokenNameendfor :
3420 return "endfor"; //$NON-NLS-1$
3421 case TokenNameendforeach :
3422 return "endforeach"; //$NON-NLS-1$
3423 case TokenNameendif :
3424 return "endif"; //$NON-NLS-1$
3425 case TokenNameendswitch :
3426 return "endswitch"; //$NON-NLS-1$
3427 case TokenNameendwhile :
3428 return "endwhile"; //$NON-NLS-1$
3429 case TokenNameextends :
3430 return "extends"; //$NON-NLS-1$
3431 case TokenNamefalse :
3432 return "false"; //$NON-NLS-1$
3434 return "for"; //$NON-NLS-1$
3435 case TokenNameforeach :
3436 return "foreach"; //$NON-NLS-1$
3437 case TokenNamefunction :
3438 return "function"; //$NON-NLS-1$
3439 case TokenNameglobal :
3440 return "global"; //$NON-NLS-1$
3442 return "if"; //$NON-NLS-1$
3443 case TokenNameinclude :
3444 return "include"; //$NON-NLS-1$
3445 case TokenNameinclude_once :
3446 return "include_once"; //$NON-NLS-1$
3447 case TokenNamelist :
3448 return "list"; //$NON-NLS-1$
3450 return "new"; //$NON-NLS-1$
3451 case TokenNamenull :
3452 return "null"; //$NON-NLS-1$
3453 case TokenNameprint :
3454 return "print"; //$NON-NLS-1$
3455 case TokenNamerequire :
3456 return "require"; //$NON-NLS-1$
3457 case TokenNamerequire_once :
3458 return "require_once"; //$NON-NLS-1$
3459 case TokenNamereturn :
3460 return "return"; //$NON-NLS-1$
3461 case TokenNamestatic :
3462 return "static"; //$NON-NLS-1$
3463 case TokenNameswitch :
3464 return "switch"; //$NON-NLS-1$
3465 case TokenNametrue :
3466 return "true"; //$NON-NLS-1$
3468 return "var"; //$NON-NLS-1$
3469 case TokenNamewhile :
3470 return "while"; //$NON-NLS-1$
3471 case TokenNamethis :
3472 return "$this"; //$NON-NLS-1$
3473 case TokenNameIntegerLiteral :
3474 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3475 case TokenNameDoubleLiteral :
3476 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3477 case TokenNameStringLiteral :
3478 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3479 case TokenNameStringConstant :
3480 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3481 case TokenNameStringInterpolated :
3482 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3483 case TokenNameHEREDOC :
3484 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3486 case TokenNamePLUS_PLUS :
3487 return "++"; //$NON-NLS-1$
3488 case TokenNameMINUS_MINUS :
3489 return "--"; //$NON-NLS-1$
3490 case TokenNameEQUAL_EQUAL :
3491 return "=="; //$NON-NLS-1$
3492 case TokenNameEQUAL_EQUAL_EQUAL :
3493 return "==="; //$NON-NLS-1$
3494 case TokenNameEQUAL_GREATER :
3495 return "=>"; //$NON-NLS-1$
3496 case TokenNameLESS_EQUAL :
3497 return "<="; //$NON-NLS-1$
3498 case TokenNameGREATER_EQUAL :
3499 return ">="; //$NON-NLS-1$
3500 case TokenNameNOT_EQUAL :
3501 return "!="; //$NON-NLS-1$
3502 case TokenNameNOT_EQUAL_EQUAL :
3503 return "!=="; //$NON-NLS-1$
3504 case TokenNameLEFT_SHIFT :
3505 return "<<"; //$NON-NLS-1$
3506 case TokenNameRIGHT_SHIFT :
3507 return ">>"; //$NON-NLS-1$
3508 case TokenNamePLUS_EQUAL :
3509 return "+="; //$NON-NLS-1$
3510 case TokenNameMINUS_EQUAL :
3511 return "-="; //$NON-NLS-1$
3512 case TokenNameMULTIPLY_EQUAL :
3513 return "*="; //$NON-NLS-1$
3514 case TokenNameDIVIDE_EQUAL :
3515 return "/="; //$NON-NLS-1$
3516 case TokenNameAND_EQUAL :
3517 return "&="; //$NON-NLS-1$
3518 case TokenNameOR_EQUAL :
3519 return "|="; //$NON-NLS-1$
3520 case TokenNameXOR_EQUAL :
3521 return "^="; //$NON-NLS-1$
3522 case TokenNameREMAINDER_EQUAL :
3523 return "%="; //$NON-NLS-1$
3524 case TokenNameLEFT_SHIFT_EQUAL :
3525 return "<<="; //$NON-NLS-1$
3526 case TokenNameRIGHT_SHIFT_EQUAL :
3527 return ">>="; //$NON-NLS-1$
3528 case TokenNameOR_OR :
3529 return "||"; //$NON-NLS-1$
3530 case TokenNameAND_AND :
3531 return "&&"; //$NON-NLS-1$
3532 case TokenNamePLUS :
3533 return "+"; //$NON-NLS-1$
3534 case TokenNameMINUS :
3535 return "-"; //$NON-NLS-1$
3536 case TokenNameMINUS_GREATER :
3539 return "!"; //$NON-NLS-1$
3540 case TokenNameREMAINDER :
3541 return "%"; //$NON-NLS-1$
3543 return "^"; //$NON-NLS-1$
3545 return "&"; //$NON-NLS-1$
3546 case TokenNameMULTIPLY :
3547 return "*"; //$NON-NLS-1$
3549 return "|"; //$NON-NLS-1$
3550 case TokenNameTWIDDLE :
3551 return "~"; //$NON-NLS-1$
3552 case TokenNameTWIDDLE_EQUAL :
3553 return "~="; //$NON-NLS-1$
3554 case TokenNameDIVIDE :
3555 return "/"; //$NON-NLS-1$
3556 case TokenNameGREATER :
3557 return ">"; //$NON-NLS-1$
3558 case TokenNameLESS :
3559 return "<"; //$NON-NLS-1$
3560 case TokenNameLPAREN :
3561 return "("; //$NON-NLS-1$
3562 case TokenNameRPAREN :
3563 return ")"; //$NON-NLS-1$
3564 case TokenNameLBRACE :
3565 return "{"; //$NON-NLS-1$
3566 case TokenNameRBRACE :
3567 return "}"; //$NON-NLS-1$
3568 case TokenNameLBRACKET :
3569 return "["; //$NON-NLS-1$
3570 case TokenNameRBRACKET :
3571 return "]"; //$NON-NLS-1$
3572 case TokenNameSEMICOLON :
3573 return ";"; //$NON-NLS-1$
3574 case TokenNameQUESTION :
3575 return "?"; //$NON-NLS-1$
3576 case TokenNameCOLON :
3577 return ":"; //$NON-NLS-1$
3578 case TokenNameCOMMA :
3579 return ","; //$NON-NLS-1$
3581 return "."; //$NON-NLS-1$
3582 case TokenNameEQUAL :
3583 return "="; //$NON-NLS-1$
3586 case TokenNameDOLLAR_LBRACE :
3589 return "EOF"; //$NON-NLS-1$
3590 case TokenNameWHITESPACE :
3591 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3592 case TokenNameCOMMENT_LINE :
3593 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3594 case TokenNameCOMMENT_BLOCK :
3595 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3596 case TokenNameCOMMENT_PHPDOC :
3597 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3598 case TokenNameHTML :
3599 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3601 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3605 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3606 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3610 boolean tokenizeComments,
3611 boolean tokenizeWhiteSpace,
3612 boolean checkNonExternalizedStringLiterals,
3613 boolean assertMode) {
3614 this.eofPosition = Integer.MAX_VALUE;
3615 this.tokenizeComments = tokenizeComments;
3616 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3617 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3618 this.assertMode = assertMode;
3621 private void checkNonExternalizeString() throws InvalidInputException {
3622 if (currentLine == null)
3624 parseTags(currentLine);
3627 private void parseTags(NLSLine line) throws InvalidInputException {
3628 String s = new String(getCurrentTokenSource());
3629 int pos = s.indexOf(TAG_PREFIX);
3630 int lineLength = line.size();
3632 int start = pos + TAG_PREFIX_LENGTH;
3633 int end = s.indexOf(TAG_POSTFIX, start);
3634 String index = s.substring(start, end);
3637 i = Integer.parseInt(index) - 1;
3638 // Tags are one based not zero based.
3639 } catch (NumberFormatException e) {
3640 i = -1; // we don't want to consider this as a valid NLS tag
3642 if (line.exists(i)) {
3645 pos = s.indexOf(TAG_PREFIX, start);
3648 this.nonNLSStrings = new StringLiteral[lineLength];
3649 int nonNLSCounter = 0;
3650 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3651 StringLiteral literal = (StringLiteral) iterator.next();
3652 if (literal != null) {
3653 this.nonNLSStrings[nonNLSCounter++] = literal;
3656 if (nonNLSCounter == 0) {
3657 this.nonNLSStrings = null;
3661 this.wasNonExternalizedStringLiteral = true;
3662 if (nonNLSCounter != lineLength) {
3663 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3668 public final void scanEscapeCharacter() throws InvalidInputException {
3669 // the string with "\\u" is a legal string of two chars \ and u
3670 //thus we use a direct access to the source (for regular cases).
3672 if (unicodeAsBackSlash) {
3673 // consume next character
3674 unicodeAsBackSlash = false;
3675 // if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
3676 // getNextUnicodeChar();
3678 if (withoutUnicodePtr != 0) {
3679 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3683 currentCharacter = source[currentPosition++];
3684 switch (currentCharacter) {
3686 currentCharacter = '\b';
3689 currentCharacter = '\t';
3692 currentCharacter = '\n';
3695 currentCharacter = '\f';
3698 currentCharacter = '\r';
3701 currentCharacter = '\"';
3704 currentCharacter = '\'';
3707 currentCharacter = '\\';
3710 // -----------octal escape--------------
3712 // OctalDigit OctalDigit
3713 // ZeroToThree OctalDigit OctalDigit
3715 int number = Character.getNumericValue(currentCharacter);
3716 if (number >= 0 && number <= 7) {
3717 boolean zeroToThreeNot = number > 3;
3718 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3719 int digit = Character.getNumericValue(currentCharacter);
3720 if (digit >= 0 && digit <= 7) {
3721 number = (number * 8) + digit;
3722 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3723 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
3726 digit = Character.getNumericValue(currentCharacter);
3727 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit
3728 number = (number * 8) + digit;
3729 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
3733 } else { // has read \OctalDigit NonDigit--> ignore last character
3736 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
3739 } else { // has read \OctalDigit --> ignore last character
3743 throw new InvalidInputException(INVALID_ESCAPE);
3744 currentCharacter = (char) number;
3746 throw new InvalidInputException(INVALID_ESCAPE);
3750 // chech presence of task: tags
3751 public void checkTaskTag(int commentStart, int commentEnd) {
3753 // only look for newer task: tags
3754 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3757 int foundTaskIndex = this.foundTaskCount;
3758 nextChar : for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
3761 char[] priority = null;
3763 // check for tag occurrence
3764 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3765 tag = this.taskTags[itag];
3766 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
3767 int tagLength = tag.length;
3768 for (int t = 0; t < tagLength; t++) {
3769 if (this.source[i + t] != tag[t])
3773 if (this.foundTaskTags == null) {
3774 this.foundTaskTags = new char[5][];
3775 this.foundTaskMessages = new char[5][];
3776 this.foundTaskPriorities = new char[5][];
3777 this.foundTaskPositions = new int[5][];
3778 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3779 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
3781 this.foundTaskMessages,
3783 this.foundTaskMessages = new char[this.foundTaskCount * 2][],
3785 this.foundTaskCount);
3787 this.foundTaskPriorities,
3789 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3791 this.foundTaskCount);
3793 this.foundTaskPositions,
3795 this.foundTaskPositions = new int[this.foundTaskCount * 2][],
3797 this.foundTaskCount);
3799 this.foundTaskTags[this.foundTaskCount] = tag;
3800 this.foundTaskPriorities[this.foundTaskCount] = priority;
3801 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
3802 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3803 this.foundTaskCount++;
3805 i += tagLength - 1; // will be incremented when looping
3809 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3810 // retrieve message start and end positions
3811 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
3812 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
3813 // at most beginning of next task
3814 if (max_value < msgStart)
3815 max_value = msgStart; // would only occur if tag is before EOF.
3819 for (int j = msgStart; j < max_value; j++) {
3820 if ((c = this.source[j]) == '\n' || c == '\r') {
3827 for (int j = max_value; j > msgStart; j--) {
3828 if ((c = this.source[j]) == '*') {
3837 if (msgStart == end)
3841 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3843 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3846 // update the end position of the task
3847 this.foundTaskPositions[i][1] = end;
3849 // get the message source
3850 final int messageLength = end - msgStart + 1;
3851 char[] message = new char[messageLength];
3853 System.arraycopy(source, msgStart, message, 0, messageLength);
3854 this.foundTaskMessages[i] = message;