1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
23 public class Scanner implements IScanner, ITerminalSymbols {
26 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
27 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct
28 * char) - sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
32 private boolean assertMode;
33 public boolean useAssertAsAnIndentifier = false;
34 //flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
38 public boolean phpMode = false;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr;
56 //when == 0 ==> no unicode in the current token
57 public boolean unicodeAsBackSlash = false;
59 public boolean scanningFloatLiteral = false;
61 //support for /** comments
62 //public char[][] comments = new char[10][];
63 public int[] commentStops = new int[10];
64 public int[] commentStarts = new int[10];
65 public int commentPtr = -1; // no comment test with commentPtr value -1
67 //diet parsing support - jump over some method body when requested
68 public boolean diet = false;
70 //support for the poor-line-debuggers ....
71 //remember the position of the cr/lf
72 public int[] lineEnds = new int[250];
73 public int linePtr = -1;
74 public boolean wasAcr = false;
76 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
78 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
86 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
91 //----------------optimized identifier managment------------------
92 static final char[] charArray_a = new char[] { 'a' },
93 charArray_b = new char[] { 'b' },
94 charArray_c = new char[] { 'c' },
95 charArray_d = new char[] { 'd' },
96 charArray_e = new char[] { 'e' },
97 charArray_f = new char[] { 'f' },
98 charArray_g = new char[] { 'g' },
99 charArray_h = new char[] { 'h' },
100 charArray_i = new char[] { 'i' },
101 charArray_j = new char[] { 'j' },
102 charArray_k = new char[] { 'k' },
103 charArray_l = new char[] { 'l' },
104 charArray_m = new char[] { 'm' },
105 charArray_n = new char[] { 'n' },
106 charArray_o = new char[] { 'o' },
107 charArray_p = new char[] { 'p' },
108 charArray_q = new char[] { 'q' },
109 charArray_r = new char[] { 'r' },
110 charArray_s = new char[] { 's' },
111 charArray_t = new char[] { 't' },
112 charArray_u = new char[] { 'u' },
113 charArray_v = new char[] { 'v' },
114 charArray_w = new char[] { 'w' },
115 charArray_x = new char[] { 'x' },
116 charArray_y = new char[] { 'y' },
117 charArray_z = new char[] { 'z' };
119 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120 static final int TableSize = 30, InternalTableSize = 6;
122 public static final int OptimizedLength = 6;
124 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125 // support for detecting non-externalized string literals
126 int currentLineNr = -1;
127 int previousLineNr = -1;
128 NLSLine currentLine = null;
129 List lines = new ArrayList();
130 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134 public StringLiteral[] nonNLSStrings = null;
135 public boolean checkNonExternalizedStringLiterals = true;
136 public boolean wasNonExternalizedStringLiteral = false;
139 for (int i = 0; i < 6; i++) {
140 for (int j = 0; j < TableSize; j++) {
141 for (int k = 0; k < InternalTableSize; k++) {
142 charArray_length[i][j][k] = initCharArray;
147 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
149 public static final int RoundBracket = 0;
150 public static final int SquareBracket = 1;
151 public static final int CurlyBracket = 2;
152 public static final int BracketKinds = 3;
155 public char[][] foundTaskTags = null;
156 public char[][] foundTaskMessages;
157 public char[][] foundTaskPriorities = null;
158 public int[][] foundTaskPositions;
159 public int foundTaskCount = 0;
160 public char[][] taskTags = null;
161 public char[][] taskPriorities = null;
163 public static final boolean DEBUG = true;
168 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
169 this(tokenizeComments, tokenizeWhiteSpace, false);
173 * Determines if the specified character is permissible as the first character in a PHP identifier
175 public static boolean isPHPIdentifierStart(char ch) {
176 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
180 * Determines if the specified character may be part of a PHP identifier as other than the first character
182 public static boolean isPHPIdentifierPart(char ch) {
183 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
186 public final boolean atEnd() {
187 // This code is not relevant if source is
188 // Only a part of the real stream input
190 return source.length == currentPosition;
192 public char[] getCurrentIdentifierSource() {
193 //return the token REAL source (aka unicodes are precomputed)
196 // if (withoutUnicodePtr != 0)
197 // //0 is used as a fast test flag so the real first char is in position 1
199 // withoutUnicodeBuffer,
201 // result = new char[withoutUnicodePtr],
203 // withoutUnicodePtr);
205 int length = currentPosition - startPosition;
206 switch (length) { // see OptimizedLength
208 return optimizedCurrentTokenSource1();
210 return optimizedCurrentTokenSource2();
212 return optimizedCurrentTokenSource3();
214 return optimizedCurrentTokenSource4();
216 return optimizedCurrentTokenSource5();
218 return optimizedCurrentTokenSource6();
221 System.arraycopy(source, startPosition, result = new char[length], 0, length);
225 public int getCurrentTokenEndPosition() {
226 return this.currentPosition - 1;
229 public final char[] getCurrentTokenSource() {
230 // Return the token REAL source (aka unicodes are precomputed)
233 // if (withoutUnicodePtr != 0)
234 // // 0 is used as a fast test flag so the real first char is in position 1
236 // withoutUnicodeBuffer,
238 // result = new char[withoutUnicodePtr],
240 // withoutUnicodePtr);
243 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
248 public final char[] getCurrentTokenSource(int startPos) {
249 // Return the token REAL source (aka unicodes are precomputed)
252 // if (withoutUnicodePtr != 0)
253 // // 0 is used as a fast test flag so the real first char is in position 1
255 // withoutUnicodeBuffer,
257 // result = new char[withoutUnicodePtr],
259 // withoutUnicodePtr);
262 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
267 public final char[] getCurrentTokenSourceString() {
268 //return the token REAL source (aka unicodes are precomputed).
269 //REMOVE the two " that are at the beginning and the end.
272 if (withoutUnicodePtr != 0)
273 //0 is used as a fast test flag so the real first char is in position 1
274 System.arraycopy(withoutUnicodeBuffer, 2,
275 //2 is 1 (real start) + 1 (to jump over the ")
276 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
279 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
283 public int getCurrentTokenStartPosition() {
284 return this.startPosition;
287 public final char[] getCurrentStringLiteralSource() {
288 // Return the token REAL source (aka unicodes are precomputed)
293 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
299 * Search the source position corresponding to the end of a given line number
301 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
303 * In case the given line number is inconsistent, answers -1.
305 public final int getLineEnd(int lineNumber) {
307 if (lineEnds == null)
309 if (lineNumber >= lineEnds.length)
314 if (lineNumber == lineEnds.length - 1)
316 return lineEnds[lineNumber - 1];
317 // next line start one character behind the lineEnd of the previous line
320 * Search the source position corresponding to the beginning of a given line number
322 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
324 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
326 * In case the given line number is inconsistent, answers -1.
328 public final int getLineStart(int lineNumber) {
330 if (lineEnds == null)
332 if (lineNumber >= lineEnds.length)
338 return initialPosition;
339 return lineEnds[lineNumber - 2] + 1;
340 // next line start one character behind the lineEnd of the previous line
342 public final boolean getNextChar(char testedChar) {
344 //handle the case of unicode.
345 //when a unicode appears then we must use a buffer that holds char internal values
346 //At the end of this method currentCharacter holds the new visited char
347 //and currentPosition points right next after it
348 //Both previous lines are true if the currentCharacter is == to the testedChar
349 //On false, no side effect has occured.
351 //ALL getNextChar.... ARE OPTIMIZED COPIES
353 int temp = currentPosition;
355 currentCharacter = source[currentPosition++];
356 // if (((currentCharacter = source[currentPosition++]) == '\\')
357 // && (source[currentPosition] == 'u')) {
358 // //-------------unicode traitement ------------
359 // int c1, c2, c3, c4;
360 // int unicodeSize = 6;
361 // currentPosition++;
362 // while (source[currentPosition] == 'u') {
363 // currentPosition++;
367 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
369 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
371 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
373 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
375 // currentPosition = temp;
379 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
380 // if (currentCharacter != testedChar) {
381 // currentPosition = temp;
384 // unicodeAsBackSlash = currentCharacter == '\\';
386 // //need the unicode buffer
387 // if (withoutUnicodePtr == 0) {
388 // //buffer all the entries that have been left aside....
389 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
393 // withoutUnicodeBuffer,
395 // withoutUnicodePtr);
397 // //fill the buffer with the char
398 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
401 // } //-------------end unicode traitement--------------
403 if (currentCharacter != testedChar) {
404 currentPosition = temp;
407 unicodeAsBackSlash = false;
408 // if (withoutUnicodePtr != 0)
409 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
412 } catch (IndexOutOfBoundsException e) {
413 unicodeAsBackSlash = false;
414 currentPosition = temp;
418 public final int getNextChar(char testedChar1, char testedChar2) {
419 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
420 //test can be done with (x==0) for the first and (x>0) for the second
421 //handle the case of unicode.
422 //when a unicode appears then we must use a buffer that holds char internal values
423 //At the end of this method currentCharacter holds the new visited char
424 //and currentPosition points right next after it
425 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
426 //On false, no side effect has occured.
428 //ALL getNextChar.... ARE OPTIMIZED COPIES
430 int temp = currentPosition;
433 currentCharacter = source[currentPosition++];
434 // if (((currentCharacter = source[currentPosition++]) == '\\')
435 // && (source[currentPosition] == 'u')) {
436 // //-------------unicode traitement ------------
437 // int c1, c2, c3, c4;
438 // int unicodeSize = 6;
439 // currentPosition++;
440 // while (source[currentPosition] == 'u') {
441 // currentPosition++;
445 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
447 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
449 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
451 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
453 // currentPosition = temp;
457 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
458 // if (currentCharacter == testedChar1)
460 // else if (currentCharacter == testedChar2)
463 // currentPosition = temp;
467 // //need the unicode buffer
468 // if (withoutUnicodePtr == 0) {
469 // //buffer all the entries that have been left aside....
470 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
474 // withoutUnicodeBuffer,
476 // withoutUnicodePtr);
478 // //fill the buffer with the char
479 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
481 // } //-------------end unicode traitement--------------
483 if (currentCharacter == testedChar1)
485 else if (currentCharacter == testedChar2)
488 currentPosition = temp;
492 // if (withoutUnicodePtr != 0)
493 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
496 } catch (IndexOutOfBoundsException e) {
497 currentPosition = temp;
501 public final boolean getNextCharAsDigit() {
503 //handle the case of unicode.
504 //when a unicode appears then we must use a buffer that holds char internal values
505 //At the end of this method currentCharacter holds the new visited char
506 //and currentPosition points right next after it
507 //Both previous lines are true if the currentCharacter is a digit
508 //On false, no side effect has occured.
510 //ALL getNextChar.... ARE OPTIMIZED COPIES
512 int temp = currentPosition;
514 currentCharacter = source[currentPosition++];
515 // if (((currentCharacter = source[currentPosition++]) == '\\')
516 // && (source[currentPosition] == 'u')) {
517 // //-------------unicode traitement ------------
518 // int c1, c2, c3, c4;
519 // int unicodeSize = 6;
520 // currentPosition++;
521 // while (source[currentPosition] == 'u') {
522 // currentPosition++;
526 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
528 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
530 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
532 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
534 // currentPosition = temp;
538 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
539 // if (!Character.isDigit(currentCharacter)) {
540 // currentPosition = temp;
544 // //need the unicode buffer
545 // if (withoutUnicodePtr == 0) {
546 // //buffer all the entries that have been left aside....
547 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
551 // withoutUnicodeBuffer,
553 // withoutUnicodePtr);
555 // //fill the buffer with the char
556 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
558 // } //-------------end unicode traitement--------------
560 if (!Character.isDigit(currentCharacter)) {
561 currentPosition = temp;
564 // if (withoutUnicodePtr != 0)
565 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
568 } catch (IndexOutOfBoundsException e) {
569 currentPosition = temp;
573 public final boolean getNextCharAsDigit(int radix) {
575 //handle the case of unicode.
576 //when a unicode appears then we must use a buffer that holds char internal values
577 //At the end of this method currentCharacter holds the new visited char
578 //and currentPosition points right next after it
579 //Both previous lines are true if the currentCharacter is a digit base on radix
580 //On false, no side effect has occured.
582 //ALL getNextChar.... ARE OPTIMIZED COPIES
584 int temp = currentPosition;
586 currentCharacter = source[currentPosition++];
587 // if (((currentCharacter = source[currentPosition++]) == '\\')
588 // && (source[currentPosition] == 'u')) {
589 // //-------------unicode traitement ------------
590 // int c1, c2, c3, c4;
591 // int unicodeSize = 6;
592 // currentPosition++;
593 // while (source[currentPosition] == 'u') {
594 // currentPosition++;
598 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
600 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
602 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
604 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
606 // currentPosition = temp;
610 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
611 // if (Character.digit(currentCharacter, radix) == -1) {
612 // currentPosition = temp;
616 // //need the unicode buffer
617 // if (withoutUnicodePtr == 0) {
618 // //buffer all the entries that have been left aside....
619 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
623 // withoutUnicodeBuffer,
625 // withoutUnicodePtr);
627 // //fill the buffer with the char
628 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
630 // } //-------------end unicode traitement--------------
632 if (Character.digit(currentCharacter, radix) == -1) {
633 currentPosition = temp;
636 // if (withoutUnicodePtr != 0)
637 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
640 } catch (IndexOutOfBoundsException e) {
641 currentPosition = temp;
645 public boolean getNextCharAsJavaIdentifierPart() {
647 //handle the case of unicode.
648 //when a unicode appears then we must use a buffer that holds char internal values
649 //At the end of this method currentCharacter holds the new visited char
650 //and currentPosition points right next after it
651 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
652 //On false, no side effect has occured.
654 //ALL getNextChar.... ARE OPTIMIZED COPIES
656 int temp = currentPosition;
658 currentCharacter = source[currentPosition++];
659 // if (((currentCharacter = source[currentPosition++]) == '\\')
660 // && (source[currentPosition] == 'u')) {
661 // //-------------unicode traitement ------------
662 // int c1, c2, c3, c4;
663 // int unicodeSize = 6;
664 // currentPosition++;
665 // while (source[currentPosition] == 'u') {
666 // currentPosition++;
670 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
672 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
674 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
676 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
678 // currentPosition = temp;
682 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
683 // if (!isPHPIdentifierPart(currentCharacter)) {
684 // currentPosition = temp;
688 // //need the unicode buffer
689 // if (withoutUnicodePtr == 0) {
690 // //buffer all the entries that have been left aside....
691 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
695 // withoutUnicodeBuffer,
697 // withoutUnicodePtr);
699 // //fill the buffer with the char
700 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
702 // } //-------------end unicode traitement--------------
704 if (!isPHPIdentifierPart(currentCharacter)) {
705 currentPosition = temp;
709 // if (withoutUnicodePtr != 0)
710 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
713 } catch (IndexOutOfBoundsException e) {
714 currentPosition = temp;
719 public int getNextToken() throws InvalidInputException {
720 int htmlPosition = currentPosition;
723 currentCharacter = source[currentPosition++];
724 if (currentCharacter == '<') {
725 if (getNextChar('?')) {
726 currentCharacter = source[currentPosition++];
727 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
729 startPosition = currentPosition;
731 if (tokenizeWhiteSpace) {
732 // && (whiteStart != currentPosition - 1)) {
733 // reposition scanner in case we are interested by spaces as tokens
734 startPosition = htmlPosition;
735 return TokenNameHTML;
738 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
740 int test = getNextChar('H', 'h');
742 test = getNextChar('P', 'p');
745 startPosition = currentPosition;
748 if (tokenizeWhiteSpace) {
749 // && (whiteStart != currentPosition - 1)) {
750 // reposition scanner in case we are interested by spaces as tokens
751 startPosition = htmlPosition;
752 return TokenNameHTML;
761 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
762 if (recordLineSeparator) {
769 } //-----------------end switch while try--------------------
770 catch (IndexOutOfBoundsException e) {
771 if (tokenizeWhiteSpace) {
772 // && (whiteStart != currentPosition - 1)) {
773 // reposition scanner in case we are interested by spaces as tokens
774 startPosition = htmlPosition;
782 jumpOverMethodBody();
784 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
787 while (true) { //loop for jumping over comments
788 withoutUnicodePtr = 0;
789 //start with a new token (even comment written with unicode )
791 // ---------Consume white space and handles startPosition---------
792 int whiteStart = currentPosition;
793 boolean isWhiteSpace;
795 startPosition = currentPosition;
796 currentCharacter = source[currentPosition++];
797 // if (((currentCharacter = source[currentPosition++]) == '\\')
798 // && (source[currentPosition] == 'u')) {
799 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
801 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
802 checkNonExternalizeString();
803 if (recordLineSeparator) {
809 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
811 } while (isWhiteSpace);
812 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
813 // reposition scanner in case we are interested by spaces as tokens
815 startPosition = whiteStart;
816 return TokenNameWHITESPACE;
818 //little trick to get out in the middle of a source compuation
819 if (currentPosition > eofPosition)
822 // ---------Identify the next token-------------
824 switch (currentCharacter) {
826 return TokenNameLPAREN;
828 return TokenNameRPAREN;
830 return TokenNameLBRACE;
832 return TokenNameRBRACE;
834 return TokenNameLBRACKET;
836 return TokenNameRBRACKET;
838 return TokenNameSEMICOLON;
840 return TokenNameCOMMA;
843 if (getNextCharAsDigit())
844 return scanNumber(true);
849 if ((test = getNextChar('+', '=')) == 0)
850 return TokenNamePLUS_PLUS;
852 return TokenNamePLUS_EQUAL;
853 return TokenNamePLUS;
858 if ((test = getNextChar('-', '=')) == 0)
859 return TokenNameMINUS_MINUS;
861 return TokenNameMINUS_EQUAL;
862 if (getNextChar('>'))
863 return TokenNameMINUS_GREATER;
865 return TokenNameMINUS;
868 if (getNextChar('='))
869 return TokenNameTWIDDLE_EQUAL;
870 return TokenNameTWIDDLE;
872 if (getNextChar('=')) {
873 if (getNextChar('=')) {
874 return TokenNameNOT_EQUAL_EQUAL;
876 return TokenNameNOT_EQUAL;
880 if (getNextChar('='))
881 return TokenNameMULTIPLY_EQUAL;
882 return TokenNameMULTIPLY;
884 if (getNextChar('='))
885 return TokenNameREMAINDER_EQUAL;
886 return TokenNameREMAINDER;
890 if ((test = getNextChar('=', '<')) == 0)
891 return TokenNameLESS_EQUAL;
893 if (getNextChar('='))
894 return TokenNameLEFT_SHIFT_EQUAL;
895 if (getNextChar('<')) {
896 int heredocStart = currentPosition;
897 int heredocLength = 0;
898 currentCharacter = source[currentPosition++];
899 if (isPHPIdentifierStart(currentCharacter)) {
900 currentCharacter = source[currentPosition++];
902 return TokenNameERROR;
904 while (isPHPIdentifierPart(currentCharacter)) {
905 currentCharacter = source[currentPosition++];
908 heredocLength = currentPosition - heredocStart - 1;
910 // heredoc end-tag determination
911 boolean endTag = true;
914 ch = source[currentPosition++];
915 if (ch == '\r' || ch == '\n') {
916 if (recordLineSeparator) {
921 for (int i = 0; i < heredocLength; i++) {
922 if (source[currentPosition + i] != source[heredocStart + i]) {
928 currentPosition += heredocLength - 1;
929 currentCharacter = source[currentPosition++];
930 break; // do...while loop
938 return TokenNameHEREDOC;
940 return TokenNameLEFT_SHIFT;
942 return TokenNameLESS;
947 if ((test = getNextChar('=', '>')) == 0)
948 return TokenNameGREATER_EQUAL;
950 if ((test = getNextChar('=', '>')) == 0)
951 return TokenNameRIGHT_SHIFT_EQUAL;
952 return TokenNameRIGHT_SHIFT;
954 return TokenNameGREATER;
957 if (getNextChar('=')) {
958 if (getNextChar('=')) {
959 return TokenNameEQUAL_EQUAL_EQUAL;
961 return TokenNameEQUAL_EQUAL;
963 if (getNextChar('>'))
964 return TokenNameEQUAL_GREATER;
965 return TokenNameEQUAL;
969 if ((test = getNextChar('&', '=')) == 0)
970 return TokenNameAND_AND;
972 return TokenNameAND_EQUAL;
978 if ((test = getNextChar('|', '=')) == 0)
979 return TokenNameOR_OR;
981 return TokenNameOR_EQUAL;
985 if (getNextChar('='))
986 return TokenNameXOR_EQUAL;
989 if (getNextChar('>')) {
991 return TokenNameStopPHP;
993 return TokenNameQUESTION;
995 if (getNextChar(':'))
996 return TokenNameCOLON_COLON;
997 return TokenNameCOLON;
1003 // if ((test = getNextChar('\n', '\r')) == 0) {
1004 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1007 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1008 // for (int lookAhead = 0;
1011 // if (currentPosition + lookAhead
1012 // == source.length)
1014 // if (source[currentPosition + lookAhead]
1017 // if (source[currentPosition + lookAhead]
1019 // currentPosition += lookAhead + 1;
1023 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1026 // if (getNextChar('\'')) {
1027 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1028 // for (int lookAhead = 0;
1031 // if (currentPosition + lookAhead
1032 // == source.length)
1034 // if (source[currentPosition + lookAhead]
1037 // if (source[currentPosition + lookAhead]
1039 // currentPosition += lookAhead + 1;
1043 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1045 // if (getNextChar('\\'))
1046 // scanEscapeCharacter();
1047 // else { // consume next character
1048 // unicodeAsBackSlash = false;
1049 // if (((currentCharacter = source[currentPosition++])
1051 // && (source[currentPosition] == 'u')) {
1052 // getNextUnicodeChar();
1054 // if (withoutUnicodePtr != 0) {
1055 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1056 // currentCharacter;
1060 // // if (getNextChar('\''))
1061 // // return TokenNameCharacterLiteral;
1062 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1063 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1064 // if (currentPosition + lookAhead == source.length)
1066 // if (source[currentPosition + lookAhead] == '\n')
1068 // if (source[currentPosition + lookAhead] == '\'') {
1069 // currentPosition += lookAhead + 1;
1073 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1076 // consume next character
1077 unicodeAsBackSlash = false;
1078 currentCharacter = source[currentPosition++];
1079 // if (((currentCharacter = source[currentPosition++]) == '\\')
1080 // && (source[currentPosition] == 'u')) {
1081 // getNextUnicodeChar();
1083 // if (withoutUnicodePtr != 0) {
1084 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1085 // currentCharacter;
1089 while (currentCharacter != '\'') {
1091 /** ** in PHP \r and \n are valid in string literals *** */
1092 // if ((currentCharacter == '\n')
1093 // || (currentCharacter == '\r')) {
1094 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1095 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1096 // if (currentPosition + lookAhead == source.length)
1098 // if (source[currentPosition + lookAhead] == '\n')
1100 // if (source[currentPosition + lookAhead] == '\"') {
1101 // currentPosition += lookAhead + 1;
1105 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1107 if (currentCharacter == '\\') {
1108 int escapeSize = currentPosition;
1109 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1110 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1111 scanSingleQuotedEscapeCharacter();
1112 escapeSize = currentPosition - escapeSize;
1113 if (withoutUnicodePtr == 0) {
1114 //buffer all the entries that have been left aside....
1115 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1116 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1117 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1118 } else { //overwrite the / in the buffer
1119 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1120 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1121 withoutUnicodePtr--;
1125 // consume next character
1126 unicodeAsBackSlash = false;
1127 currentCharacter = source[currentPosition++];
1128 // if (((currentCharacter = source[currentPosition++]) == '\\')
1129 // && (source[currentPosition] == 'u')) {
1130 // getNextUnicodeChar();
1132 if (withoutUnicodePtr != 0) {
1133 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1138 } catch (IndexOutOfBoundsException e) {
1139 throw new InvalidInputException(UNTERMINATED_STRING);
1140 } catch (InvalidInputException e) {
1141 if (e.getMessage().equals(INVALID_ESCAPE)) {
1142 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1143 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1144 if (currentPosition + lookAhead == source.length)
1146 if (source[currentPosition + lookAhead] == '\n')
1148 if (source[currentPosition + lookAhead] == '\'') {
1149 currentPosition += lookAhead + 1;
1157 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1158 if (currentLine == null) {
1159 currentLine = new NLSLine();
1160 lines.add(currentLine);
1162 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1164 return TokenNameStringConstant;
1167 // consume next character
1168 unicodeAsBackSlash = false;
1169 currentCharacter = source[currentPosition++];
1170 // if (((currentCharacter = source[currentPosition++]) == '\\')
1171 // && (source[currentPosition] == 'u')) {
1172 // getNextUnicodeChar();
1174 // if (withoutUnicodePtr != 0) {
1175 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1176 // currentCharacter;
1180 while (currentCharacter != '"') {
1182 /** ** in PHP \r and \n are valid in string literals *** */
1183 // if ((currentCharacter == '\n')
1184 // || (currentCharacter == '\r')) {
1185 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1186 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1187 // if (currentPosition + lookAhead == source.length)
1189 // if (source[currentPosition + lookAhead] == '\n')
1191 // if (source[currentPosition + lookAhead] == '\"') {
1192 // currentPosition += lookAhead + 1;
1196 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1198 if (currentCharacter == '\\') {
1199 int escapeSize = currentPosition;
1200 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1201 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1202 scanDoubleQuotedEscapeCharacter();
1203 escapeSize = currentPosition - escapeSize;
1204 if (withoutUnicodePtr == 0) {
1205 //buffer all the entries that have been left aside....
1206 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1207 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1208 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1209 } else { //overwrite the / in the buffer
1210 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1211 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1212 withoutUnicodePtr--;
1216 // consume next character
1217 unicodeAsBackSlash = false;
1218 currentCharacter = source[currentPosition++];
1219 // if (((currentCharacter = source[currentPosition++]) == '\\')
1220 // && (source[currentPosition] == 'u')) {
1221 // getNextUnicodeChar();
1223 if (withoutUnicodePtr != 0) {
1224 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1229 } catch (IndexOutOfBoundsException e) {
1230 throw new InvalidInputException(UNTERMINATED_STRING);
1231 } catch (InvalidInputException e) {
1232 if (e.getMessage().equals(INVALID_ESCAPE)) {
1233 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1234 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1235 if (currentPosition + lookAhead == source.length)
1237 if (source[currentPosition + lookAhead] == '\n')
1239 if (source[currentPosition + lookAhead] == '\"') {
1240 currentPosition += lookAhead + 1;
1248 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1249 if (currentLine == null) {
1250 currentLine = new NLSLine();
1251 lines.add(currentLine);
1253 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1255 return TokenNameStringLiteral;
1258 // consume next character
1259 unicodeAsBackSlash = false;
1260 currentCharacter = source[currentPosition++];
1261 // if (((currentCharacter = source[currentPosition++]) == '\\')
1262 // && (source[currentPosition] == 'u')) {
1263 // getNextUnicodeChar();
1265 // if (withoutUnicodePtr != 0) {
1266 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1267 // currentCharacter;
1271 while (currentCharacter != '`') {
1273 /** ** in PHP \r and \n are valid in string literals *** */
1274 // if ((currentCharacter == '\n')
1275 // || (currentCharacter == '\r')) {
1276 // // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1277 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1278 // if (currentPosition + lookAhead == source.length)
1280 // if (source[currentPosition + lookAhead] == '\n')
1282 // if (source[currentPosition + lookAhead] == '\"') {
1283 // currentPosition += lookAhead + 1;
1287 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1289 if (currentCharacter == '\\') {
1290 int escapeSize = currentPosition;
1291 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1292 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1293 scanDoubleQuotedEscapeCharacter();
1294 escapeSize = currentPosition - escapeSize;
1295 if (withoutUnicodePtr == 0) {
1296 //buffer all the entries that have been left aside....
1297 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1298 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1299 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1300 } else { //overwrite the / in the buffer
1301 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1302 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1303 withoutUnicodePtr--;
1307 // consume next character
1308 unicodeAsBackSlash = false;
1309 currentCharacter = source[currentPosition++];
1310 // if (((currentCharacter = source[currentPosition++]) == '\\')
1311 // && (source[currentPosition] == 'u')) {
1312 // getNextUnicodeChar();
1314 if (withoutUnicodePtr != 0) {
1315 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1320 } catch (IndexOutOfBoundsException e) {
1321 throw new InvalidInputException(UNTERMINATED_STRING);
1322 } catch (InvalidInputException e) {
1323 if (e.getMessage().equals(INVALID_ESCAPE)) {
1324 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1325 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1326 if (currentPosition + lookAhead == source.length)
1328 if (source[currentPosition + lookAhead] == '\n')
1330 if (source[currentPosition + lookAhead] == '`') {
1331 currentPosition += lookAhead + 1;
1339 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1340 if (currentLine == null) {
1341 currentLine = new NLSLine();
1342 lines.add(currentLine);
1344 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1346 return TokenNameStringInterpolated;
1351 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1353 int endPositionForLineComment = 0;
1354 try { //get the next char
1355 currentCharacter = source[currentPosition++];
1356 // if (((currentCharacter = source[currentPosition++])
1358 // && (source[currentPosition] == 'u')) {
1359 // //-------------unicode traitement ------------
1360 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1361 // currentPosition++;
1362 // while (source[currentPosition] == 'u') {
1363 // currentPosition++;
1366 // Character.getNumericValue(source[currentPosition++]))
1370 // Character.getNumericValue(source[currentPosition++]))
1374 // Character.getNumericValue(source[currentPosition++]))
1378 // Character.getNumericValue(source[currentPosition++]))
1381 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1383 // currentCharacter =
1384 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1388 //handle the \\u case manually into comment
1389 // if (currentCharacter == '\\') {
1390 // if (source[currentPosition] == '\\')
1391 // currentPosition++;
1392 // } //jump over the \\
1393 boolean isUnicode = false;
1394 while (currentCharacter != '\r' && currentCharacter != '\n') {
1395 if (currentCharacter == '?') {
1396 if (getNextChar('>')) {
1397 startPosition = currentPosition - 2;
1399 return TokenNameStopPHP;
1405 currentCharacter = source[currentPosition++];
1406 // if (((currentCharacter = source[currentPosition++])
1408 // && (source[currentPosition] == 'u')) {
1409 // isUnicode = true;
1410 // //-------------unicode traitement ------------
1411 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1412 // currentPosition++;
1413 // while (source[currentPosition] == 'u') {
1414 // currentPosition++;
1417 // Character.getNumericValue(source[currentPosition++]))
1421 // Character.getNumericValue(
1422 // source[currentPosition++]))
1426 // Character.getNumericValue(
1427 // source[currentPosition++]))
1431 // Character.getNumericValue(
1432 // source[currentPosition++]))
1435 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1437 // currentCharacter =
1438 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1441 //handle the \\u case manually into comment
1442 // if (currentCharacter == '\\') {
1443 // if (source[currentPosition] == '\\')
1444 // currentPosition++;
1445 // } //jump over the \\
1448 endPositionForLineComment = currentPosition - 6;
1450 endPositionForLineComment = currentPosition - 1;
1452 recordComment(false);
1453 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1454 checkNonExternalizeString();
1455 if (recordLineSeparator) {
1457 pushUnicodeLineSeparator();
1459 pushLineSeparator();
1465 if (tokenizeComments) {
1467 currentPosition = endPositionForLineComment;
1468 // reset one character behind
1470 return TokenNameCOMMENT_LINE;
1472 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1473 if (tokenizeComments) {
1475 // reset one character behind
1476 return TokenNameCOMMENT_LINE;
1482 //traditional and annotation comment
1483 boolean isJavadoc = false, star = false;
1484 // consume next character
1485 unicodeAsBackSlash = false;
1486 currentCharacter = source[currentPosition++];
1487 // if (((currentCharacter = source[currentPosition++]) == '\\')
1488 // && (source[currentPosition] == 'u')) {
1489 // getNextUnicodeChar();
1491 // if (withoutUnicodePtr != 0) {
1492 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1493 // currentCharacter;
1497 if (currentCharacter == '*') {
1501 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1502 checkNonExternalizeString();
1503 if (recordLineSeparator) {
1504 pushLineSeparator();
1509 try { //get the next char
1510 currentCharacter = source[currentPosition++];
1511 // if (((currentCharacter = source[currentPosition++])
1513 // && (source[currentPosition] == 'u')) {
1514 // //-------------unicode traitement ------------
1515 // getNextUnicodeChar();
1517 //handle the \\u case manually into comment
1518 // if (currentCharacter == '\\') {
1519 // if (source[currentPosition] == '\\')
1520 // currentPosition++;
1521 // //jump over the \\
1523 // empty comment is not a javadoc /**/
1524 if (currentCharacter == '/') {
1527 //loop until end of comment */
1528 while ((currentCharacter != '/') || (!star)) {
1529 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1530 checkNonExternalizeString();
1531 if (recordLineSeparator) {
1532 pushLineSeparator();
1537 star = currentCharacter == '*';
1539 currentCharacter = source[currentPosition++];
1540 // if (((currentCharacter = source[currentPosition++])
1542 // && (source[currentPosition] == 'u')) {
1543 // //-------------unicode traitement ------------
1544 // getNextUnicodeChar();
1546 //handle the \\u case manually into comment
1547 // if (currentCharacter == '\\') {
1548 // if (source[currentPosition] == '\\')
1549 // currentPosition++;
1550 // } //jump over the \\
1552 recordComment(isJavadoc);
1553 if (tokenizeComments) {
1555 return TokenNameCOMMENT_PHPDOC;
1556 return TokenNameCOMMENT_BLOCK;
1558 } catch (IndexOutOfBoundsException e) {
1559 throw new InvalidInputException(UNTERMINATED_COMMENT);
1563 if (getNextChar('='))
1564 return TokenNameDIVIDE_EQUAL;
1565 return TokenNameDIVIDE;
1569 return TokenNameEOF;
1570 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1571 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1574 if (currentCharacter == '$') {
1575 while ((currentCharacter = source[currentPosition++]) == '$') {
1577 if (currentCharacter == '{')
1578 return TokenNameDOLLAR_LBRACE;
1579 if (isPHPIdentifierStart(currentCharacter))
1580 return scanIdentifierOrKeyword(true);
1581 return TokenNameERROR;
1583 if (isPHPIdentifierStart(currentCharacter))
1584 return scanIdentifierOrKeyword(false);
1585 if (Character.isDigit(currentCharacter))
1586 return scanNumber(false);
1587 return TokenNameERROR;
1590 } //-----------------end switch while try--------------------
1591 catch (IndexOutOfBoundsException e) {
1594 return TokenNameEOF;
1597 // public final void getNextUnicodeChar()
1598 // throws IndexOutOfBoundsException, InvalidInputException {
1600 // //handle the case of unicode.
1601 // //when a unicode appears then we must use a buffer that holds char internal values
1602 // //At the end of this method currentCharacter holds the new visited char
1603 // //and currentPosition points right next after it
1605 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1607 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1608 // currentPosition++;
1609 // while (source[currentPosition] == 'u') {
1610 // currentPosition++;
1614 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1616 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1618 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1620 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1622 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1624 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1625 // //need the unicode buffer
1626 // if (withoutUnicodePtr == 0) {
1627 // //buffer all the entries that have been left aside....
1628 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1629 // System.arraycopy(
1632 // withoutUnicodeBuffer,
1634 // withoutUnicodePtr);
1636 // //fill the buffer with the char
1637 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1639 // unicodeAsBackSlash = currentCharacter == '\\';
1642 * Tokenize a method body, assuming that curly brackets are properly balanced.
1644 public final void jumpOverMethodBody() {
1646 this.wasAcr = false;
1649 while (true) { //loop for jumping over comments
1650 // ---------Consume white space and handles startPosition---------
1651 boolean isWhiteSpace;
1653 startPosition = currentPosition;
1654 currentCharacter = source[currentPosition++];
1655 // if (((currentCharacter = source[currentPosition++]) == '\\')
1656 // && (source[currentPosition] == 'u')) {
1657 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1659 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1660 pushLineSeparator();
1661 isWhiteSpace = Character.isWhitespace(currentCharacter);
1663 } while (isWhiteSpace);
1665 // -------consume token until } is found---------
1666 switch (currentCharacter) {
1678 test = getNextChar('\\');
1681 scanDoubleQuotedEscapeCharacter();
1682 } catch (InvalidInputException ex) {
1685 // try { // consume next character
1686 unicodeAsBackSlash = false;
1687 currentCharacter = source[currentPosition++];
1688 // if (((currentCharacter = source[currentPosition++]) == '\\')
1689 // && (source[currentPosition] == 'u')) {
1690 // getNextUnicodeChar();
1692 if (withoutUnicodePtr != 0) {
1693 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1696 // } catch (InvalidInputException ex) {
1704 // try { // consume next character
1705 unicodeAsBackSlash = false;
1706 currentCharacter = source[currentPosition++];
1707 // if (((currentCharacter = source[currentPosition++]) == '\\')
1708 // && (source[currentPosition] == 'u')) {
1709 // getNextUnicodeChar();
1711 if (withoutUnicodePtr != 0) {
1712 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1715 // } catch (InvalidInputException ex) {
1717 while (currentCharacter != '"') {
1718 if (currentCharacter == '\r') {
1719 if (source[currentPosition] == '\n')
1722 // the string cannot go further that the line
1724 if (currentCharacter == '\n') {
1726 // the string cannot go further that the line
1728 if (currentCharacter == '\\') {
1730 scanDoubleQuotedEscapeCharacter();
1731 } catch (InvalidInputException ex) {
1734 // try { // consume next character
1735 unicodeAsBackSlash = false;
1736 currentCharacter = source[currentPosition++];
1737 // if (((currentCharacter = source[currentPosition++]) == '\\')
1738 // && (source[currentPosition] == 'u')) {
1739 // getNextUnicodeChar();
1741 if (withoutUnicodePtr != 0) {
1742 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1745 // } catch (InvalidInputException ex) {
1748 } catch (IndexOutOfBoundsException e) {
1755 if ((test = getNextChar('/', '*')) == 0) {
1759 currentCharacter = source[currentPosition++];
1760 // if (((currentCharacter = source[currentPosition++]) == '\\')
1761 // && (source[currentPosition] == 'u')) {
1762 // //-------------unicode traitement ------------
1763 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1764 // currentPosition++;
1765 // while (source[currentPosition] == 'u') {
1766 // currentPosition++;
1769 // Character.getNumericValue(source[currentPosition++]))
1773 // Character.getNumericValue(source[currentPosition++]))
1777 // Character.getNumericValue(source[currentPosition++]))
1781 // Character.getNumericValue(source[currentPosition++]))
1784 // //error don't care of the value
1785 // currentCharacter = 'A';
1786 // } //something different from \n and \r
1788 // currentCharacter =
1789 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1793 while (currentCharacter != '\r' && currentCharacter != '\n') {
1795 currentCharacter = source[currentPosition++];
1796 // if (((currentCharacter = source[currentPosition++])
1798 // && (source[currentPosition] == 'u')) {
1799 // //-------------unicode traitement ------------
1800 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1801 // currentPosition++;
1802 // while (source[currentPosition] == 'u') {
1803 // currentPosition++;
1806 // Character.getNumericValue(source[currentPosition++]))
1810 // Character.getNumericValue(source[currentPosition++]))
1814 // Character.getNumericValue(source[currentPosition++]))
1818 // Character.getNumericValue(source[currentPosition++]))
1821 // //error don't care of the value
1822 // currentCharacter = 'A';
1823 // } //something different from \n and \r
1825 // currentCharacter =
1826 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1830 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1831 pushLineSeparator();
1832 } catch (IndexOutOfBoundsException e) {
1833 } //an eof will them be generated
1837 //traditional and annotation comment
1838 boolean star = false;
1839 // try { // consume next character
1840 unicodeAsBackSlash = false;
1841 currentCharacter = source[currentPosition++];
1842 // if (((currentCharacter = source[currentPosition++]) == '\\')
1843 // && (source[currentPosition] == 'u')) {
1844 // getNextUnicodeChar();
1846 if (withoutUnicodePtr != 0) {
1847 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1850 // } catch (InvalidInputException ex) {
1852 if (currentCharacter == '*') {
1855 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1856 pushLineSeparator();
1857 try { //get the next char
1858 currentCharacter = source[currentPosition++];
1859 // if (((currentCharacter = source[currentPosition++]) == '\\')
1860 // && (source[currentPosition] == 'u')) {
1861 // //-------------unicode traitement ------------
1862 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1863 // currentPosition++;
1864 // while (source[currentPosition] == 'u') {
1865 // currentPosition++;
1868 // Character.getNumericValue(source[currentPosition++]))
1872 // Character.getNumericValue(source[currentPosition++]))
1876 // Character.getNumericValue(source[currentPosition++]))
1880 // Character.getNumericValue(source[currentPosition++]))
1883 // //error don't care of the value
1884 // currentCharacter = 'A';
1885 // } //something different from * and /
1887 // currentCharacter =
1888 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1891 //loop until end of comment */
1892 while ((currentCharacter != '/') || (!star)) {
1893 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1894 pushLineSeparator();
1895 star = currentCharacter == '*';
1897 currentCharacter = source[currentPosition++];
1898 // if (((currentCharacter = source[currentPosition++])
1900 // && (source[currentPosition] == 'u')) {
1901 // //-------------unicode traitement ------------
1902 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1903 // currentPosition++;
1904 // while (source[currentPosition] == 'u') {
1905 // currentPosition++;
1908 // Character.getNumericValue(source[currentPosition++]))
1912 // Character.getNumericValue(source[currentPosition++]))
1916 // Character.getNumericValue(source[currentPosition++]))
1920 // Character.getNumericValue(source[currentPosition++]))
1923 // //error don't care of the value
1924 // currentCharacter = 'A';
1925 // } //something different from * and /
1927 // currentCharacter =
1928 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1932 } catch (IndexOutOfBoundsException e) {
1941 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1943 scanIdentifierOrKeyword((currentCharacter == '$'));
1944 } catch (InvalidInputException ex) {
1948 if (Character.isDigit(currentCharacter)) {
1951 } catch (InvalidInputException ex) {
1957 //-----------------end switch while try--------------------
1958 } catch (IndexOutOfBoundsException e) {
1959 } catch (InvalidInputException e) {
1963 // public final boolean jumpOverUnicodeWhiteSpace()
1964 // throws InvalidInputException {
1966 // //handle the case of unicode. Jump over the next whiteSpace
1967 // //making startPosition pointing on the next available char
1968 // //On false, the currentCharacter is filled up with a potential
1972 // this.wasAcr = false;
1973 // int c1, c2, c3, c4;
1974 // int unicodeSize = 6;
1975 // currentPosition++;
1976 // while (source[currentPosition] == 'u') {
1977 // currentPosition++;
1981 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1983 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1985 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1987 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1989 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1992 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1993 // if (recordLineSeparator
1994 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1995 // pushLineSeparator();
1996 // if (Character.isWhitespace(currentCharacter))
1999 // //buffer the new char which is not a white space
2000 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2001 // //withoutUnicodePtr == 1 is true here
2003 // } catch (IndexOutOfBoundsException e) {
2004 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2007 public final int[] getLineEnds() {
2008 //return a bounded copy of this.lineEnds
2011 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2015 public char[] getSource() {
2018 final char[] optimizedCurrentTokenSource1() {
2019 //return always the same char[] build only once
2021 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2022 char charOne = source[startPosition];
2077 return new char[] { charOne };
2081 final char[] optimizedCurrentTokenSource2() {
2082 //try to return the same char[] build only once
2085 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2086 char[][] table = charArray_length[0][hash];
2088 while (++i < InternalTableSize) {
2089 char[] charArray = table[i];
2090 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2093 //---------other side---------
2095 int max = newEntry2;
2096 while (++i <= max) {
2097 char[] charArray = table[i];
2098 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2101 //--------add the entry-------
2102 if (++max >= InternalTableSize)
2105 table[max] = (r = new char[] { c0, c1 });
2110 final char[] optimizedCurrentTokenSource3() {
2111 //try to return the same char[] build only once
2115 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2117 char[][] table = charArray_length[1][hash];
2119 while (++i < InternalTableSize) {
2120 char[] charArray = table[i];
2121 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2124 //---------other side---------
2126 int max = newEntry3;
2127 while (++i <= max) {
2128 char[] charArray = table[i];
2129 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2132 //--------add the entry-------
2133 if (++max >= InternalTableSize)
2136 table[max] = (r = new char[] { c0, c1, c2 });
2141 final char[] optimizedCurrentTokenSource4() {
2142 //try to return the same char[] build only once
2144 char c0, c1, c2, c3;
2146 ((((long) (c0 = source[startPosition])) << 18)
2147 + ((c1 = source[startPosition + 1]) << 12)
2148 + ((c2 = source[startPosition + 2]) << 6)
2149 + (c3 = source[startPosition + 3]))
2151 char[][] table = charArray_length[2][(int) hash];
2153 while (++i < InternalTableSize) {
2154 char[] charArray = table[i];
2155 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2158 //---------other side---------
2160 int max = newEntry4;
2161 while (++i <= max) {
2162 char[] charArray = table[i];
2163 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2166 //--------add the entry-------
2167 if (++max >= InternalTableSize)
2170 table[max] = (r = new char[] { c0, c1, c2, c3 });
2176 final char[] optimizedCurrentTokenSource5() {
2177 //try to return the same char[] build only once
2179 char c0, c1, c2, c3, c4;
2181 ((((long) (c0 = source[startPosition])) << 24)
2182 + (((long) (c1 = source[startPosition + 1])) << 18)
2183 + ((c2 = source[startPosition + 2]) << 12)
2184 + ((c3 = source[startPosition + 3]) << 6)
2185 + (c4 = source[startPosition + 4]))
2187 char[][] table = charArray_length[3][(int) hash];
2189 while (++i < InternalTableSize) {
2190 char[] charArray = table[i];
2191 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2194 //---------other side---------
2196 int max = newEntry5;
2197 while (++i <= max) {
2198 char[] charArray = table[i];
2199 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2202 //--------add the entry-------
2203 if (++max >= InternalTableSize)
2206 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2212 final char[] optimizedCurrentTokenSource6() {
2213 //try to return the same char[] build only once
2215 char c0, c1, c2, c3, c4, c5;
2217 ((((long) (c0 = source[startPosition])) << 32)
2218 + (((long) (c1 = source[startPosition + 1])) << 24)
2219 + (((long) (c2 = source[startPosition + 2])) << 18)
2220 + ((c3 = source[startPosition + 3]) << 12)
2221 + ((c4 = source[startPosition + 4]) << 6)
2222 + (c5 = source[startPosition + 5]))
2224 char[][] table = charArray_length[4][(int) hash];
2226 while (++i < InternalTableSize) {
2227 char[] charArray = table[i];
2228 if ((c0 == charArray[0])
2229 && (c1 == charArray[1])
2230 && (c2 == charArray[2])
2231 && (c3 == charArray[3])
2232 && (c4 == charArray[4])
2233 && (c5 == charArray[5]))
2236 //---------other side---------
2238 int max = newEntry6;
2239 while (++i <= max) {
2240 char[] charArray = table[i];
2241 if ((c0 == charArray[0])
2242 && (c1 == charArray[1])
2243 && (c2 == charArray[2])
2244 && (c3 == charArray[3])
2245 && (c4 == charArray[4])
2246 && (c5 == charArray[5]))
2249 //--------add the entry-------
2250 if (++max >= InternalTableSize)
2253 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2258 public final void pushLineSeparator() throws InvalidInputException {
2259 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2260 final int INCREMENT = 250;
2262 if (this.checkNonExternalizedStringLiterals) {
2263 // reinitialize the current line for non externalize strings purpose
2266 //currentCharacter is at position currentPosition-1
2269 if (currentCharacter == '\r') {
2270 int separatorPos = currentPosition - 1;
2271 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2273 //System.out.println("CR-" + separatorPos);
2275 lineEnds[++linePtr] = separatorPos;
2276 } catch (IndexOutOfBoundsException e) {
2277 //linePtr value is correct
2278 int oldLength = lineEnds.length;
2279 int[] old = lineEnds;
2280 lineEnds = new int[oldLength + INCREMENT];
2281 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2282 lineEnds[linePtr] = separatorPos;
2284 // look-ahead for merged cr+lf
2286 if (source[currentPosition] == '\n') {
2287 //System.out.println("look-ahead LF-" + currentPosition);
2288 lineEnds[linePtr] = currentPosition;
2294 } catch (IndexOutOfBoundsException e) {
2299 if (currentCharacter == '\n') {
2300 //must merge eventual cr followed by lf
2301 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2302 //System.out.println("merge LF-" + (currentPosition - 1));
2303 lineEnds[linePtr] = currentPosition - 1;
2305 int separatorPos = currentPosition - 1;
2306 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2308 // System.out.println("LF-" + separatorPos);
2310 lineEnds[++linePtr] = separatorPos;
2311 } catch (IndexOutOfBoundsException e) {
2312 //linePtr value is correct
2313 int oldLength = lineEnds.length;
2314 int[] old = lineEnds;
2315 lineEnds = new int[oldLength + INCREMENT];
2316 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2317 lineEnds[linePtr] = separatorPos;
2324 public final void pushUnicodeLineSeparator() {
2325 // isUnicode means that the \r or \n has been read as a unicode character
2327 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2329 final int INCREMENT = 250;
2330 //currentCharacter is at position currentPosition-1
2332 if (this.checkNonExternalizedStringLiterals) {
2333 // reinitialize the current line for non externalize strings purpose
2338 if (currentCharacter == '\r') {
2339 int separatorPos = currentPosition - 6;
2340 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2342 //System.out.println("CR-" + separatorPos);
2344 lineEnds[++linePtr] = separatorPos;
2345 } catch (IndexOutOfBoundsException e) {
2346 //linePtr value is correct
2347 int oldLength = lineEnds.length;
2348 int[] old = lineEnds;
2349 lineEnds = new int[oldLength + INCREMENT];
2350 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2351 lineEnds[linePtr] = separatorPos;
2353 // look-ahead for merged cr+lf
2354 if (source[currentPosition] == '\n') {
2355 //System.out.println("look-ahead LF-" + currentPosition);
2356 lineEnds[linePtr] = currentPosition;
2364 if (currentCharacter == '\n') {
2365 //must merge eventual cr followed by lf
2366 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2367 //System.out.println("merge LF-" + (currentPosition - 1));
2368 lineEnds[linePtr] = currentPosition - 6;
2370 int separatorPos = currentPosition - 6;
2371 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2373 // System.out.println("LF-" + separatorPos);
2375 lineEnds[++linePtr] = separatorPos;
2376 } catch (IndexOutOfBoundsException e) {
2377 //linePtr value is correct
2378 int oldLength = lineEnds.length;
2379 int[] old = lineEnds;
2380 lineEnds = new int[oldLength + INCREMENT];
2381 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2382 lineEnds[linePtr] = separatorPos;
2389 public final void recordComment(boolean isJavadoc) {
2391 // a new annotation comment is recorded
2393 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2394 } catch (IndexOutOfBoundsException e) {
2395 int oldStackLength = commentStops.length;
2396 int[] oldStack = commentStops;
2397 commentStops = new int[oldStackLength + 30];
2398 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2399 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2400 //grows the positions buffers too
2401 int[] old = commentStarts;
2402 commentStarts = new int[oldStackLength + 30];
2403 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2406 //the buffer is of a correct size here
2407 commentStarts[commentPtr] = startPosition;
2409 public void resetTo(int begin, int end) {
2410 //reset the scanner to a given position where it may rescan again
2413 initialPosition = startPosition = currentPosition = begin;
2414 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2415 commentPtr = -1; // reset comment stack
2418 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2419 // the string with "\\u" is a legal string of two chars \ and u
2420 //thus we use a direct access to the source (for regular cases).
2422 // if (unicodeAsBackSlash) {
2423 // // consume next character
2424 // unicodeAsBackSlash = false;
2425 // if (((currentCharacter = source[currentPosition++]) == '\\')
2426 // && (source[currentPosition] == 'u')) {
2427 // getNextUnicodeChar();
2429 // if (withoutUnicodePtr != 0) {
2430 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2434 currentCharacter = source[currentPosition++];
2435 switch (currentCharacter) {
2437 currentCharacter = '\'';
2440 currentCharacter = '\\';
2443 currentCharacter = '\\';
2448 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2449 // the string with "\\u" is a legal string of two chars \ and u
2450 //thus we use a direct access to the source (for regular cases).
2452 // if (unicodeAsBackSlash) {
2453 // // consume next character
2454 // unicodeAsBackSlash = false;
2455 // if (((currentCharacter = source[currentPosition++]) == '\\')
2456 // && (source[currentPosition] == 'u')) {
2457 // getNextUnicodeChar();
2459 // if (withoutUnicodePtr != 0) {
2460 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2464 currentCharacter = source[currentPosition++];
2465 switch (currentCharacter) {
2467 // currentCharacter = '\b';
2470 currentCharacter = '\t';
2473 currentCharacter = '\n';
2476 // currentCharacter = '\f';
2479 currentCharacter = '\r';
2482 currentCharacter = '\"';
2485 currentCharacter = '\'';
2488 currentCharacter = '\\';
2491 currentCharacter = '$';
2494 // -----------octal escape--------------
2496 // OctalDigit OctalDigit
2497 // ZeroToThree OctalDigit OctalDigit
2499 int number = Character.getNumericValue(currentCharacter);
2500 if (number >= 0 && number <= 7) {
2501 boolean zeroToThreeNot = number > 3;
2502 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503 int digit = Character.getNumericValue(currentCharacter);
2504 if (digit >= 0 && digit <= 7) {
2505 number = (number * 8) + digit;
2506 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2507 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2510 digit = Character.getNumericValue(currentCharacter);
2511 if (digit >= 0 && digit <= 7) {
2512 // has read \ZeroToThree OctalDigit OctalDigit
2513 number = (number * 8) + digit;
2514 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2518 } else { // has read \OctalDigit NonDigit--> ignore last character
2521 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2524 } else { // has read \OctalDigit --> ignore last character
2528 throw new InvalidInputException(INVALID_ESCAPE);
2529 currentCharacter = (char) number;
2532 // throw new InvalidInputException(INVALID_ESCAPE);
2536 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2537 // return scanIdentifierOrKeyword( false );
2540 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2543 //first dispatch on the first char.
2544 //then the length. If there are several
2545 //keywors with the same length AND the same first char, then do another
2546 //disptach on the second char :-)...cool....but fast !
2548 useAssertAsAnIndentifier = false;
2550 while (getNextCharAsJavaIdentifierPart()) {
2554 if (new String(getCurrentTokenSource()).equals("$this")) {
2555 return TokenNamethis;
2557 return TokenNameVariable;
2562 // if (withoutUnicodePtr == 0)
2564 //quick test on length == 1 but not on length > 12 while most identifier
2565 //have a length which is <= 12...but there are lots of identifier with
2569 if ((length = currentPosition - startPosition) == 1)
2570 return TokenNameIdentifier;
2572 data = new char[length];
2573 index = startPosition;
2574 for (int i = 0; i < length; i++) {
2575 data[i] = Character.toLowerCase(source[index + i]);
2579 // if ((length = withoutUnicodePtr) == 1)
2580 // return TokenNameIdentifier;
2581 // // data = withoutUnicodeBuffer;
2582 // data = new char[withoutUnicodeBuffer.length];
2583 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2584 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2589 firstLetter = data[index];
2590 switch (firstLetter) {
2592 case 'a' : // as and array abstract
2595 if ((data[++index] == 's')) {
2598 return TokenNameIdentifier;
2601 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2602 return TokenNameAND;
2604 return TokenNameIdentifier;
2607 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2608 return TokenNamearray;
2610 return TokenNameIdentifier;
2612 if ((data[++index] == 'b')
2613 && (data[++index] == 's')
2614 && (data[++index] == 't')
2615 && (data[++index] == 'r')
2616 && (data[++index] == 'a')
2617 && (data[++index] == 'c')
2618 && (data[++index] == 't'))
2619 return TokenNameabstract;
2621 return TokenNameIdentifier;
2623 return TokenNameIdentifier;
2628 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2629 return TokenNamebreak;
2631 return TokenNameIdentifier;
2633 return TokenNameIdentifier;
2636 case 'c' : //case catch class const continue
2639 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2640 return TokenNamecase;
2642 return TokenNameIdentifier;
2644 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
2645 return TokenNamecatch;
2646 if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2647 return TokenNameclass;
2648 if ((data[index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
2649 return TokenNameconst;
2651 return TokenNameIdentifier;
2653 if ((data[++index] == 'o')
2654 && (data[++index] == 'n')
2655 && (data[++index] == 't')
2656 && (data[++index] == 'i')
2657 && (data[++index] == 'n')
2658 && (data[++index] == 'u')
2659 && (data[++index] == 'e'))
2660 return TokenNamecontinue;
2662 return TokenNameIdentifier;
2664 return TokenNameIdentifier;
2667 case 'd' : // declare default do die
2668 // TODO delete define ==> no keyword !
2671 if ((data[++index] == 'o'))
2674 return TokenNameIdentifier;
2676 // if ((data[++index] == 'e')
2677 // && (data[++index] == 'f')
2678 // && (data[++index] == 'i')
2679 // && (data[++index] == 'n')
2680 // && (data[++index] == 'e'))
2681 // return TokenNamedefine;
2683 // return TokenNameIdentifier;
2685 if ((data[++index] == 'e')
2686 && (data[++index] == 'c')
2687 && (data[++index] == 'l')
2688 && (data[++index] == 'a')
2689 && (data[++index] == 'r')
2690 && (data[++index] == 'e'))
2691 return TokenNamedeclare;
2693 if ((data[++index] == 'e')
2694 && (data[++index] == 'f')
2695 && (data[++index] == 'a')
2696 && (data[++index] == 'u')
2697 && (data[++index] == 'l')
2698 && (data[++index] == 't'))
2699 return TokenNamedefault;
2701 return TokenNameIdentifier;
2703 return TokenNameIdentifier;
2705 case 'e' : //echo else exit elseif extends eval
2708 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2709 return TokenNameecho;
2710 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2711 return TokenNameelse;
2712 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
2713 return TokenNameexit;
2714 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
2715 return TokenNameeval;
2717 return TokenNameIdentifier;
2718 case 5 : // endif empty
2719 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2720 return TokenNameendif;
2721 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
2722 return TokenNameempty;
2724 return TokenNameIdentifier;
2726 if ((data[++index] == 'n')
2727 && (data[++index] == 'd')
2728 && (data[++index] == 'f')
2729 && (data[++index] == 'o')
2730 && (data[++index] == 'r'))
2731 return TokenNameendfor;
2733 (data[index] == 'l')
2734 && (data[++index] == 's')
2735 && (data[++index] == 'e')
2736 && (data[++index] == 'i')
2737 && (data[++index] == 'f'))
2738 return TokenNameelseif;
2740 return TokenNameIdentifier;
2742 if ((data[++index] == 'x')
2743 && (data[++index] == 't')
2744 && (data[++index] == 'e')
2745 && (data[++index] == 'n')
2746 && (data[++index] == 'd')
2747 && (data[++index] == 's'))
2748 return TokenNameextends;
2750 return TokenNameIdentifier;
2751 case 8 : // endwhile
2752 if ((data[++index] == 'n')
2753 && (data[++index] == 'd')
2754 && (data[++index] == 'w')
2755 && (data[++index] == 'h')
2756 && (data[++index] == 'i')
2757 && (data[++index] == 'l')
2758 && (data[++index] == 'e'))
2759 return TokenNameendwhile;
2761 return TokenNameIdentifier;
2762 case 9 : // endswitch
2763 if ((data[++index] == 'n')
2764 && (data[++index] == 'd')
2765 && (data[++index] == 's')
2766 && (data[++index] == 'w')
2767 && (data[++index] == 'i')
2768 && (data[++index] == 't')
2769 && (data[++index] == 'c')
2770 && (data[++index] == 'h'))
2771 return TokenNameendswitch;
2773 return TokenNameIdentifier;
2774 case 10 : // enddeclare
2775 if ((data[++index] == 'n')
2776 && (data[++index] == 'd')
2777 && (data[++index] == 'd')
2778 && (data[++index] == 'e')
2779 && (data[++index] == 'c')
2780 && (data[++index] == 'l')
2781 && (data[++index] == 'a')
2782 && (data[++index] == 'r')
2783 && (data[++index] == 'e'))
2784 return TokenNameendforeach;
2786 if ((data[++index] == 'n') // endforeach
2787 && (data[++index] == 'd')
2788 && (data[++index] == 'f')
2789 && (data[++index] == 'o')
2790 && (data[++index] == 'r')
2791 && (data[++index] == 'e')
2792 && (data[++index] == 'a')
2793 && (data[++index] == 'c')
2794 && (data[++index] == 'h'))
2795 return TokenNameendforeach;
2797 return TokenNameIdentifier;
2800 return TokenNameIdentifier;
2803 case 'f' : //for false final function
2806 if ((data[++index] == 'o') && (data[++index] == 'r'))
2807 return TokenNamefor;
2809 return TokenNameIdentifier;
2811 if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2812 return TokenNamefalse;
2813 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
2814 return TokenNamefinal;
2816 return TokenNameIdentifier;
2818 if ((data[++index] == 'o')
2819 && (data[++index] == 'r')
2820 && (data[++index] == 'e')
2821 && (data[++index] == 'a')
2822 && (data[++index] == 'c')
2823 && (data[++index] == 'h'))
2824 return TokenNameforeach;
2826 return TokenNameIdentifier;
2827 case 8 : // function
2828 if ((data[++index] == 'u')
2829 && (data[++index] == 'n')
2830 && (data[++index] == 'c')
2831 && (data[++index] == 't')
2832 && (data[++index] == 'i')
2833 && (data[++index] == 'o')
2834 && (data[++index] == 'n'))
2835 return TokenNamefunction;
2837 return TokenNameIdentifier;
2839 return TokenNameIdentifier;
2843 if ((data[++index] == 'l')
2844 && (data[++index] == 'o')
2845 && (data[++index] == 'b')
2846 && (data[++index] == 'a')
2847 && (data[++index] == 'l')) {
2848 return TokenNameglobal;
2851 return TokenNameIdentifier;
2853 case 'i' : //if int isset include include_once instanceof interface implements
2856 if (data[++index] == 'f')
2859 return TokenNameIdentifier;
2861 // if ((data[++index] == 'n') && (data[++index] == 't'))
2862 // return TokenNameint;
2864 // return TokenNameIdentifier;
2866 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
2867 return TokenNameisset;
2869 return TokenNameIdentifier;
2871 if ((data[++index] == 'n')
2872 && (data[++index] == 'c')
2873 && (data[++index] == 'l')
2874 && (data[++index] == 'u')
2875 && (data[++index] == 'd')
2876 && (data[++index] == 'e'))
2877 return TokenNameinclude;
2879 return TokenNameIdentifier;
2880 case 9 : // interface
2881 if ((data[++index] == 'n')
2882 && (data[++index] == 't')
2883 && (data[++index] == 'e')
2884 && (data[++index] == 'r')
2885 && (data[++index] == 'f')
2886 && (data[++index] == 'a')
2887 && (data[++index] == 'c')
2888 && (data[++index] == 'e'))
2889 return TokenNameinterface;
2891 return TokenNameIdentifier;
2892 case 10 : // instanceof
2893 if ((data[++index] == 'n')
2894 && (data[++index] == 's')
2895 && (data[++index] == 't')
2896 && (data[++index] == 'a')
2897 && (data[++index] == 'n')
2898 && (data[++index] == 'c')
2899 && (data[++index] == 'e')
2900 && (data[++index] == 'o')
2901 && (data[++index] == 'f'))
2902 return TokenNameinstanceof;
2903 if ((data[index] == 'm')
2904 && (data[++index] == 'p')
2905 && (data[++index] == 'l')
2906 && (data[++index] == 'e')
2907 && (data[++index] == 'm')
2908 && (data[++index] == 'e')
2909 && (data[++index] == 'n')
2910 && (data[++index] == 't')
2911 && (data[++index] == 's'))
2912 return TokenNameimplements;
2914 return TokenNameIdentifier;
2916 if ((data[++index] == 'n')
2917 && (data[++index] == 'c')
2918 && (data[++index] == 'l')
2919 && (data[++index] == 'u')
2920 && (data[++index] == 'd')
2921 && (data[++index] == 'e')
2922 && (data[++index] == '_')
2923 && (data[++index] == 'o')
2924 && (data[++index] == 'n')
2925 && (data[++index] == 'c')
2926 && (data[++index] == 'e'))
2927 return TokenNameinclude_once;
2929 return TokenNameIdentifier;
2931 return TokenNameIdentifier;
2936 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2937 return TokenNamelist;
2940 return TokenNameIdentifier;
2942 case 'n' : // new null
2945 if ((data[++index] == 'e') && (data[++index] == 'w'))
2946 return TokenNamenew;
2948 return TokenNameIdentifier;
2950 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2951 return TokenNamenull;
2953 return TokenNameIdentifier;
2956 return TokenNameIdentifier;
2958 case 'o' : // or old_function
2960 if (data[++index] == 'r') {
2964 // if (length == 12) {
2965 // if ((data[++index] == 'l')
2966 // && (data[++index] == 'd')
2967 // && (data[++index] == '_')
2968 // && (data[++index] == 'f')
2969 // && (data[++index] == 'u')
2970 // && (data[++index] == 'n')
2971 // && (data[++index] == 'c')
2972 // && (data[++index] == 't')
2973 // && (data[++index] == 'i')
2974 // && (data[++index] == 'o')
2975 // && (data[++index] == 'n')) {
2976 // return TokenNameold_function;
2979 return TokenNameIdentifier;
2981 case 'p' : // print public private protected
2984 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2985 return TokenNameprint;
2987 return TokenNameIdentifier;
2989 if ((data[++index] == 'u')
2990 && (data[++index] == 'b')
2991 && (data[++index] == 'l')
2992 && (data[++index] == 'i')
2993 && (data[++index] == 'c')) {
2994 return TokenNamepublic;
2996 return TokenNameIdentifier;
2998 if ((data[++index] == 'r')
2999 && (data[++index] == 'i')
3000 && (data[++index] == 'v')
3001 && (data[++index] == 'a')
3002 && (data[++index] == 't')
3003 && (data[++index] == 'e')) {
3004 return TokenNameprivate;
3006 return TokenNameIdentifier;
3008 if ((data[++index] == 'r')
3009 && (data[++index] == 'o')
3010 && (data[++index] == 't')
3011 && (data[++index] == 'e')
3012 && (data[++index] == 'c')
3013 && (data[++index] == 't')
3014 && (data[++index] == 'e')
3015 && (data[++index] == 'd')) {
3016 return TokenNameprotected;
3018 return TokenNameIdentifier;
3020 return TokenNameIdentifier;
3021 case 'r' : //return require require_once
3023 if ((data[++index] == 'e')
3024 && (data[++index] == 't')
3025 && (data[++index] == 'u')
3026 && (data[++index] == 'r')
3027 && (data[++index] == 'n')) {
3028 return TokenNamereturn;
3030 } else if (length == 7) {
3031 if ((data[++index] == 'e')
3032 && (data[++index] == 'q')
3033 && (data[++index] == 'u')
3034 && (data[++index] == 'i')
3035 && (data[++index] == 'r')
3036 && (data[++index] == 'e')) {
3037 return TokenNamerequire;
3039 } else if (length == 12) {
3040 if ((data[++index] == 'e')
3041 && (data[++index] == 'q')
3042 && (data[++index] == 'u')
3043 && (data[++index] == 'i')
3044 && (data[++index] == 'r')
3045 && (data[++index] == 'e')
3046 && (data[++index] == '_')
3047 && (data[++index] == 'o')
3048 && (data[++index] == 'n')
3049 && (data[++index] == 'c')
3050 && (data[++index] == 'e')) {
3051 return TokenNamerequire_once;
3054 return TokenNameIdentifier;
3056 case 's' : //static switch
3059 if (data[++index] == 't')
3060 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3061 return TokenNamestatic;
3063 return TokenNameIdentifier;
3065 (data[index] == 'w')
3066 && (data[++index] == 'i')
3067 && (data[++index] == 't')
3068 && (data[++index] == 'c')
3069 && (data[++index] == 'h'))
3070 return TokenNameswitch;
3072 return TokenNameIdentifier;
3074 return TokenNameIdentifier;
3077 case 't' : // try true throw
3080 if ((data[++index] == 'r') && (data[++index] == 'y'))
3081 return TokenNametry;
3083 return TokenNameIdentifier;
3085 if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
3086 return TokenNametrue;
3088 return TokenNameIdentifier;
3090 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3091 return TokenNamethrow;
3093 return TokenNameIdentifier;
3096 return TokenNameIdentifier;
3098 case 'u' : //use unset
3101 if ((data[++index] == 's') && (data[++index] == 'e'))
3102 return TokenNameuse;
3104 return TokenNameIdentifier;
3106 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3107 return TokenNameunset;
3109 return TokenNameIdentifier;
3111 return TokenNameIdentifier;
3116 if ((data[++index] == 'a') && (data[++index] == 'r'))
3117 return TokenNamevar;
3119 return TokenNameIdentifier;
3122 return TokenNameIdentifier;
3128 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3129 return TokenNamewhile;
3131 return TokenNameIdentifier;
3132 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&&
3133 // (data[++index]=='p'))
3134 //return TokenNamewidefp ;
3136 //return TokenNameIdentifier;
3138 return TokenNameIdentifier;
3144 if ((data[++index] == 'o') && (data[++index] == 'r'))
3145 return TokenNameXOR;
3147 return TokenNameIdentifier;
3150 return TokenNameIdentifier;
3153 return TokenNameIdentifier;
3156 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3158 //when entering this method the currentCharacter is the firt
3159 //digit of the number , i.e. it may be preceeded by a . when
3162 boolean floating = dotPrefix;
3163 if ((!dotPrefix) && (currentCharacter == '0')) {
3164 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3165 //force the first char of the hexa number do exist...
3166 // consume next character
3167 unicodeAsBackSlash = false;
3168 currentCharacter = source[currentPosition++];
3169 // if (((currentCharacter = source[currentPosition++]) == '\\')
3170 // && (source[currentPosition] == 'u')) {
3171 // getNextUnicodeChar();
3173 // if (withoutUnicodePtr != 0) {
3174 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3177 if (Character.digit(currentCharacter, 16) == -1)
3178 throw new InvalidInputException(INVALID_HEXA);
3180 while (getNextCharAsDigit(16)) {
3182 // if (getNextChar('l', 'L') >= 0)
3183 // return TokenNameLongLiteral;
3185 return TokenNameIntegerLiteral;
3188 //there is x or X in the number
3189 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3190 if (getNextCharAsDigit()) {
3191 //-------------potential octal-----------------
3192 while (getNextCharAsDigit()) {
3195 // if (getNextChar('l', 'L') >= 0) {
3196 // return TokenNameLongLiteral;
3199 // if (getNextChar('f', 'F') >= 0) {
3200 // return TokenNameFloatingPointLiteral;
3203 if (getNextChar('d', 'D') >= 0) {
3204 return TokenNameDoubleLiteral;
3205 } else { //make the distinction between octal and float ....
3206 if (getNextChar('.')) { //bingo ! ....
3207 while (getNextCharAsDigit()) {
3209 if (getNextChar('e', 'E') >= 0) {
3210 // consume next character
3211 unicodeAsBackSlash = false;
3212 currentCharacter = source[currentPosition++];
3213 // if (((currentCharacter = source[currentPosition++]) == '\\')
3214 // && (source[currentPosition] == 'u')) {
3215 // getNextUnicodeChar();
3217 // if (withoutUnicodePtr != 0) {
3218 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3222 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3223 // consume next character
3224 unicodeAsBackSlash = false;
3225 currentCharacter = source[currentPosition++];
3226 // if (((currentCharacter = source[currentPosition++]) == '\\')
3227 // && (source[currentPosition] == 'u')) {
3228 // getNextUnicodeChar();
3230 // if (withoutUnicodePtr != 0) {
3231 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3232 // currentCharacter;
3236 if (!Character.isDigit(currentCharacter))
3237 throw new InvalidInputException(INVALID_FLOAT);
3238 while (getNextCharAsDigit()) {
3241 // if (getNextChar('f', 'F') >= 0)
3242 // return TokenNameFloatingPointLiteral;
3243 getNextChar('d', 'D'); //jump over potential d or D
3244 return TokenNameDoubleLiteral;
3246 return TokenNameIntegerLiteral;
3254 while (getNextCharAsDigit()) {
3257 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3258 // return TokenNameLongLiteral;
3260 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3261 while (getNextCharAsDigit()) {
3266 //if floating is true both exponant and suffix may be optional
3268 if (getNextChar('e', 'E') >= 0) {
3270 // consume next character
3271 unicodeAsBackSlash = false;
3272 currentCharacter = source[currentPosition++];
3273 // if (((currentCharacter = source[currentPosition++]) == '\\')
3274 // && (source[currentPosition] == 'u')) {
3275 // getNextUnicodeChar();
3277 // if (withoutUnicodePtr != 0) {
3278 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3282 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3283 unicodeAsBackSlash = false;
3284 currentCharacter = source[currentPosition++];
3285 // if (((currentCharacter = source[currentPosition++]) == '\\')
3286 // && (source[currentPosition] == 'u')) {
3287 // getNextUnicodeChar();
3289 // if (withoutUnicodePtr != 0) {
3290 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3294 if (!Character.isDigit(currentCharacter))
3295 throw new InvalidInputException(INVALID_FLOAT);
3296 while (getNextCharAsDigit()) {
3300 if (getNextChar('d', 'D') >= 0)
3301 return TokenNameDoubleLiteral;
3302 // if (getNextChar('f', 'F') >= 0)
3303 // return TokenNameFloatingPointLiteral;
3305 //the long flag has been tested before
3307 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3310 * Search the line number corresponding to a specific position
3313 public final int getLineNumber(int position) {
3315 if (lineEnds == null)
3317 int length = linePtr + 1;
3320 int g = 0, d = length - 1;
3324 if (position < lineEnds[m]) {
3326 } else if (position > lineEnds[m]) {
3332 if (position < lineEnds[m]) {
3338 public void setPHPMode(boolean mode) {
3342 public final void setSource(char[] source) {
3343 //the source-buffer is set to sourceString
3345 if (source == null) {
3346 this.source = new char[0];
3348 this.source = source;
3351 initialPosition = currentPosition = 0;
3352 containsAssertKeyword = false;
3353 withoutUnicodeBuffer = new char[this.source.length];
3357 public String toString() {
3358 if (startPosition == source.length)
3359 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3360 if (currentPosition > source.length)
3361 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3363 char front[] = new char[startPosition];
3364 System.arraycopy(source, 0, front, 0, startPosition);
3366 int middleLength = (currentPosition - 1) - startPosition + 1;
3368 if (middleLength > -1) {
3369 middle = new char[middleLength];
3370 System.arraycopy(source, startPosition, middle, 0, middleLength);
3372 middle = new char[0];
3375 char end[] = new char[source.length - (currentPosition - 1)];
3376 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3378 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3379 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3382 public final String toStringAction(int act) {
3385 case TokenNameERROR :
3386 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3387 case TokenNameStopPHP :
3388 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3389 case TokenNameIdentifier :
3390 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3391 case TokenNameVariable :
3392 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3393 case TokenNameabstract :
3394 return "abstract"; //$NON-NLS-1$
3395 case TokenNamearray :
3396 return "array"; //$NON-NLS-1$
3398 return "as"; //$NON-NLS-1$
3399 case TokenNamebreak :
3400 return "break"; //$NON-NLS-1$
3401 case TokenNamecase :
3402 return "case"; //$NON-NLS-1$
3403 case TokenNameclass :
3404 return "class"; //$NON-NLS-1$
3405 case TokenNamecontinue :
3406 return "continue"; //$NON-NLS-1$
3407 case TokenNamedefault :
3408 return "default"; //$NON-NLS-1$
3409 // case TokenNamedefine :
3410 // return "define"; //$NON-NLS-1$
3412 return "do"; //$NON-NLS-1$
3413 case TokenNameecho :
3414 return "echo"; //$NON-NLS-1$
3415 case TokenNameelse :
3416 return "else"; //$NON-NLS-1$
3417 case TokenNameelseif :
3418 return "elseif"; //$NON-NLS-1$
3419 case TokenNameendfor :
3420 return "endfor"; //$NON-NLS-1$
3421 case TokenNameendforeach :
3422 return "endforeach"; //$NON-NLS-1$
3423 case TokenNameendif :
3424 return "endif"; //$NON-NLS-1$
3425 case TokenNameendswitch :
3426 return "endswitch"; //$NON-NLS-1$
3427 case TokenNameendwhile :
3428 return "endwhile"; //$NON-NLS-1$
3429 case TokenNameextends :
3430 return "extends"; //$NON-NLS-1$
3431 case TokenNamefalse :
3432 return "false"; //$NON-NLS-1$
3433 case TokenNamefinal :
3434 return "final"; //$NON-NLS-1$
3436 return "for"; //$NON-NLS-1$
3437 case TokenNameforeach :
3438 return "foreach"; //$NON-NLS-1$
3439 case TokenNamefunction :
3440 return "function"; //$NON-NLS-1$
3441 case TokenNameglobal :
3442 return "global"; //$NON-NLS-1$
3444 return "if"; //$NON-NLS-1$
3445 case TokenNameimplements :
3446 return "implements"; //$NON-NLS-1$
3447 case TokenNameinclude :
3448 return "include"; //$NON-NLS-1$
3449 case TokenNameinclude_once :
3450 return "include_once"; //$NON-NLS-1$
3451 case TokenNameinterface :
3452 return "interface"; //$NON-NLS-1$
3453 case TokenNamelist :
3454 return "list"; //$NON-NLS-1$
3456 return "new"; //$NON-NLS-1$
3457 case TokenNamenull :
3458 return "null"; //$NON-NLS-1$
3459 case TokenNameprint :
3460 return "print"; //$NON-NLS-1$
3461 case TokenNameprivate :
3462 return "private"; //$NON-NLS-1$
3463 case TokenNameprotected :
3464 return "protected"; //$NON-NLS-1$
3465 case TokenNamepublic :
3466 return "public"; //$NON-NLS-1$
3467 case TokenNamerequire :
3468 return "require"; //$NON-NLS-1$
3469 case TokenNamerequire_once :
3470 return "require_once"; //$NON-NLS-1$
3471 case TokenNamereturn :
3472 return "return"; //$NON-NLS-1$
3473 case TokenNamestatic :
3474 return "static"; //$NON-NLS-1$
3475 case TokenNameswitch :
3476 return "switch"; //$NON-NLS-1$
3477 case TokenNametrue :
3478 return "true"; //$NON-NLS-1$
3479 case TokenNameunset :
3480 return "unset"; //$NON-NLS-1$
3482 return "var"; //$NON-NLS-1$
3483 case TokenNamewhile :
3484 return "while"; //$NON-NLS-1$
3485 case TokenNamethis :
3486 return "$this"; //$NON-NLS-1$
3487 case TokenNameIntegerLiteral :
3488 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3489 case TokenNameDoubleLiteral :
3490 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3491 case TokenNameStringLiteral :
3492 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3493 case TokenNameStringConstant :
3494 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3495 case TokenNameStringInterpolated :
3496 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3497 case TokenNameHEREDOC :
3498 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3500 case TokenNamePLUS_PLUS :
3501 return "++"; //$NON-NLS-1$
3502 case TokenNameMINUS_MINUS :
3503 return "--"; //$NON-NLS-1$
3504 case TokenNameEQUAL_EQUAL :
3505 return "=="; //$NON-NLS-1$
3506 case TokenNameEQUAL_EQUAL_EQUAL :
3507 return "==="; //$NON-NLS-1$
3508 case TokenNameEQUAL_GREATER :
3509 return "=>"; //$NON-NLS-1$
3510 case TokenNameLESS_EQUAL :
3511 return "<="; //$NON-NLS-1$
3512 case TokenNameGREATER_EQUAL :
3513 return ">="; //$NON-NLS-1$
3514 case TokenNameNOT_EQUAL :
3515 return "!="; //$NON-NLS-1$
3516 case TokenNameNOT_EQUAL_EQUAL :
3517 return "!=="; //$NON-NLS-1$
3518 case TokenNameLEFT_SHIFT :
3519 return "<<"; //$NON-NLS-1$
3520 case TokenNameRIGHT_SHIFT :
3521 return ">>"; //$NON-NLS-1$
3522 case TokenNamePLUS_EQUAL :
3523 return "+="; //$NON-NLS-1$
3524 case TokenNameMINUS_EQUAL :
3525 return "-="; //$NON-NLS-1$
3526 case TokenNameMULTIPLY_EQUAL :
3527 return "*="; //$NON-NLS-1$
3528 case TokenNameDIVIDE_EQUAL :
3529 return "/="; //$NON-NLS-1$
3530 case TokenNameAND_EQUAL :
3531 return "&="; //$NON-NLS-1$
3532 case TokenNameOR_EQUAL :
3533 return "|="; //$NON-NLS-1$
3534 case TokenNameXOR_EQUAL :
3535 return "^="; //$NON-NLS-1$
3536 case TokenNameREMAINDER_EQUAL :
3537 return "%="; //$NON-NLS-1$
3538 case TokenNameLEFT_SHIFT_EQUAL :
3539 return "<<="; //$NON-NLS-1$
3540 case TokenNameRIGHT_SHIFT_EQUAL :
3541 return ">>="; //$NON-NLS-1$
3542 case TokenNameOR_OR :
3543 return "||"; //$NON-NLS-1$
3544 case TokenNameAND_AND :
3545 return "&&"; //$NON-NLS-1$
3546 case TokenNamePLUS :
3547 return "+"; //$NON-NLS-1$
3548 case TokenNameMINUS :
3549 return "-"; //$NON-NLS-1$
3550 case TokenNameMINUS_GREATER :
3553 return "!"; //$NON-NLS-1$
3554 case TokenNameREMAINDER :
3555 return "%"; //$NON-NLS-1$
3557 return "^"; //$NON-NLS-1$
3559 return "&"; //$NON-NLS-1$
3560 case TokenNameMULTIPLY :
3561 return "*"; //$NON-NLS-1$
3563 return "|"; //$NON-NLS-1$
3564 case TokenNameTWIDDLE :
3565 return "~"; //$NON-NLS-1$
3566 case TokenNameTWIDDLE_EQUAL :
3567 return "~="; //$NON-NLS-1$
3568 case TokenNameDIVIDE :
3569 return "/"; //$NON-NLS-1$
3570 case TokenNameGREATER :
3571 return ">"; //$NON-NLS-1$
3572 case TokenNameLESS :
3573 return "<"; //$NON-NLS-1$
3574 case TokenNameLPAREN :
3575 return "("; //$NON-NLS-1$
3576 case TokenNameRPAREN :
3577 return ")"; //$NON-NLS-1$
3578 case TokenNameLBRACE :
3579 return "{"; //$NON-NLS-1$
3580 case TokenNameRBRACE :
3581 return "}"; //$NON-NLS-1$
3582 case TokenNameLBRACKET :
3583 return "["; //$NON-NLS-1$
3584 case TokenNameRBRACKET :
3585 return "]"; //$NON-NLS-1$
3586 case TokenNameSEMICOLON :
3587 return ";"; //$NON-NLS-1$
3588 case TokenNameQUESTION :
3589 return "?"; //$NON-NLS-1$
3590 case TokenNameCOLON :
3591 return ":"; //$NON-NLS-1$
3592 case TokenNameCOMMA :
3593 return ","; //$NON-NLS-1$
3595 return "."; //$NON-NLS-1$
3596 case TokenNameEQUAL :
3597 return "="; //$NON-NLS-1$
3600 case TokenNameDOLLAR_LBRACE :
3603 return "EOF"; //$NON-NLS-1$
3604 case TokenNameWHITESPACE :
3605 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3606 case TokenNameCOMMENT_LINE :
3607 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3608 case TokenNameCOMMENT_BLOCK :
3609 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3610 case TokenNameCOMMENT_PHPDOC :
3611 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3612 case TokenNameHTML :
3613 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3615 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3619 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3620 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3624 boolean tokenizeComments,
3625 boolean tokenizeWhiteSpace,
3626 boolean checkNonExternalizedStringLiterals,
3627 boolean assertMode) {
3628 this.eofPosition = Integer.MAX_VALUE;
3629 this.tokenizeComments = tokenizeComments;
3630 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3631 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3632 this.assertMode = assertMode;
3635 private void checkNonExternalizeString() throws InvalidInputException {
3636 if (currentLine == null)
3638 parseTags(currentLine);
3641 private void parseTags(NLSLine line) throws InvalidInputException {
3642 String s = new String(getCurrentTokenSource());
3643 int pos = s.indexOf(TAG_PREFIX);
3644 int lineLength = line.size();
3646 int start = pos + TAG_PREFIX_LENGTH;
3647 int end = s.indexOf(TAG_POSTFIX, start);
3648 String index = s.substring(start, end);
3651 i = Integer.parseInt(index) - 1;
3652 // Tags are one based not zero based.
3653 } catch (NumberFormatException e) {
3654 i = -1; // we don't want to consider this as a valid NLS tag
3656 if (line.exists(i)) {
3659 pos = s.indexOf(TAG_PREFIX, start);
3662 this.nonNLSStrings = new StringLiteral[lineLength];
3663 int nonNLSCounter = 0;
3664 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3665 StringLiteral literal = (StringLiteral) iterator.next();
3666 if (literal != null) {
3667 this.nonNLSStrings[nonNLSCounter++] = literal;
3670 if (nonNLSCounter == 0) {
3671 this.nonNLSStrings = null;
3675 this.wasNonExternalizedStringLiteral = true;
3676 if (nonNLSCounter != lineLength) {
3677 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3682 public final void scanEscapeCharacter() throws InvalidInputException {
3683 // the string with "\\u" is a legal string of two chars \ and u
3684 //thus we use a direct access to the source (for regular cases).
3686 if (unicodeAsBackSlash) {
3687 // consume next character
3688 unicodeAsBackSlash = false;
3689 // if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
3690 // getNextUnicodeChar();
3692 if (withoutUnicodePtr != 0) {
3693 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3697 currentCharacter = source[currentPosition++];
3698 switch (currentCharacter) {
3700 currentCharacter = '\b';
3703 currentCharacter = '\t';
3706 currentCharacter = '\n';
3709 currentCharacter = '\f';
3712 currentCharacter = '\r';
3715 currentCharacter = '\"';
3718 currentCharacter = '\'';
3721 currentCharacter = '\\';
3724 // -----------octal escape--------------
3726 // OctalDigit OctalDigit
3727 // ZeroToThree OctalDigit OctalDigit
3729 int number = Character.getNumericValue(currentCharacter);
3730 if (number >= 0 && number <= 7) {
3731 boolean zeroToThreeNot = number > 3;
3732 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3733 int digit = Character.getNumericValue(currentCharacter);
3734 if (digit >= 0 && digit <= 7) {
3735 number = (number * 8) + digit;
3736 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3737 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
3740 digit = Character.getNumericValue(currentCharacter);
3741 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit
3742 number = (number * 8) + digit;
3743 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
3747 } else { // has read \OctalDigit NonDigit--> ignore last character
3750 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
3753 } else { // has read \OctalDigit --> ignore last character
3757 throw new InvalidInputException(INVALID_ESCAPE);
3758 currentCharacter = (char) number;
3760 throw new InvalidInputException(INVALID_ESCAPE);
3764 // chech presence of task: tags
3765 public void checkTaskTag(int commentStart, int commentEnd) {
3767 // only look for newer task: tags
3768 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3771 int foundTaskIndex = this.foundTaskCount;
3772 nextChar : for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
3775 char[] priority = null;
3777 // check for tag occurrence
3778 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3779 tag = this.taskTags[itag];
3780 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
3781 int tagLength = tag.length;
3782 for (int t = 0; t < tagLength; t++) {
3783 if (this.source[i + t] != tag[t])
3787 if (this.foundTaskTags == null) {
3788 this.foundTaskTags = new char[5][];
3789 this.foundTaskMessages = new char[5][];
3790 this.foundTaskPriorities = new char[5][];
3791 this.foundTaskPositions = new int[5][];
3792 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3793 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
3795 this.foundTaskMessages,
3797 this.foundTaskMessages = new char[this.foundTaskCount * 2][],
3799 this.foundTaskCount);
3801 this.foundTaskPriorities,
3803 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3805 this.foundTaskCount);
3807 this.foundTaskPositions,
3809 this.foundTaskPositions = new int[this.foundTaskCount * 2][],
3811 this.foundTaskCount);
3813 this.foundTaskTags[this.foundTaskCount] = tag;
3814 this.foundTaskPriorities[this.foundTaskCount] = priority;
3815 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
3816 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3817 this.foundTaskCount++;
3819 i += tagLength - 1; // will be incremented when looping
3823 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3824 // retrieve message start and end positions
3825 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
3826 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
3827 // at most beginning of next task
3828 if (max_value < msgStart)
3829 max_value = msgStart; // would only occur if tag is before EOF.
3833 for (int j = msgStart; j < max_value; j++) {
3834 if ((c = this.source[j]) == '\n' || c == '\r') {
3841 for (int j = max_value; j > msgStart; j--) {
3842 if ((c = this.source[j]) == '*') {
3851 if (msgStart == end)
3855 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3857 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3860 // update the end position of the task
3861 this.foundTaskPositions[i][1] = end;
3863 // get the message source
3864 final int messageLength = end - msgStart + 1;
3865 char[] message = new char[messageLength];
3867 System.arraycopy(source, msgStart, message, 0, messageLength);
3868 this.foundTaskMessages[i] = message;