1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
22 * APIs ares - getNextToken() which return the current type of the token
23 * (this value is not memorized by the scanner) - getCurrentTokenSource()
24 * which provides with the token "REAL" source (aka all unicode have been
25 * transformed into a correct char) - sourceStart gives the position into the
26 * stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
30 public boolean useAssertAsAnIndentifier = false;
31 //flag indicating if processed source contains occurrences of keyword assert
32 public boolean containsAssertKeyword = false;
33 public boolean recordLineSeparator;
34 public boolean phpMode = false;
35 public char currentCharacter;
36 public int startPosition;
37 public int currentPosition;
38 public int initialPosition, eofPosition;
39 // after this position eof are generated instead of real token from the
41 public boolean tokenizeComments;
42 public boolean tokenizeWhiteSpace;
43 //source should be viewed as a window (aka a part)
44 //of a entire very large stream
47 public char[] withoutUnicodeBuffer;
48 public int withoutUnicodePtr;
49 //when == 0 ==> no unicode in the current token
50 public boolean unicodeAsBackSlash = false;
51 public boolean scanningFloatLiteral = false;
52 //support for /** comments
53 //public char[][] comments = new char[10][];
54 public int[] commentStops = new int[10];
55 public int[] commentStarts = new int[10];
56 public int commentPtr = -1; // no comment test with commentPtr value -1
57 //diet parsing support - jump over some method body when requested
58 public boolean diet = false;
59 //support for the poor-line-debuggers ....
60 //remember the position of the cr/lf
61 public int[] lineEnds = new int[250];
62 public int linePtr = -1;
63 public boolean wasAcr = false;
64 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
65 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
66 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
67 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
68 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
69 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
70 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
71 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
72 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
73 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
74 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
75 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
76 //----------------optimized identifier managment------------------
77 static final char[] charArray_a = new char[]{'a'},
78 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
79 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
80 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
81 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
82 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
83 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
84 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
85 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
86 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
87 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
88 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
89 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
90 charArray_z = new char[]{'z'};
91 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
92 '\u0000', '\u0000', '\u0000'};
93 static final int TableSize = 30, InternalTableSize = 6;
95 public static final int OptimizedLength = 6;
97 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
98 // support for detecting non-externalized string literals
99 int currentLineNr = -1;
100 int previousLineNr = -1;
101 NLSLine currentLine = null;
102 List lines = new ArrayList();
103 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
104 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
105 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
106 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
107 public StringLiteral[] nonNLSStrings = null;
108 public boolean checkNonExternalizedStringLiterals = true;
109 public boolean wasNonExternalizedStringLiteral = false;
111 for (int i = 0; i < 6; i++) {
112 for (int j = 0; j < TableSize; j++) {
113 for (int k = 0; k < InternalTableSize; k++) {
114 charArray_length[i][j][k] = initCharArray;
119 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
121 public static final int RoundBracket = 0;
122 public static final int SquareBracket = 1;
123 public static final int CurlyBracket = 2;
124 public static final int BracketKinds = 3;
126 public char[][] foundTaskTags = null;
127 public char[][] foundTaskMessages;
128 public char[][] foundTaskPriorities = null;
129 public int[][] foundTaskPositions;
130 public int foundTaskCount = 0;
131 public char[][] taskTags = null;
132 public char[][] taskPriorities = null;
133 public static final boolean DEBUG = false;
137 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
138 this(tokenizeComments, tokenizeWhiteSpace, false);
141 * Determines if the specified character is permissible as the first
142 * character in a PHP identifier
144 public static boolean isPHPIdentifierStart(char ch) {
145 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
148 * Determines if the specified character may be part of a PHP identifier as
149 * other than the first character
151 public static boolean isPHPIdentifierPart(char ch) {
152 return Character.isLetterOrDigit(ch) || (ch == '_')
153 || (0x7F <= ch && ch <= 0xFF);
155 public final boolean atEnd() {
156 // This code is not relevant if source is
157 // Only a part of the real stream input
158 return source.length == currentPosition;
160 public char[] getCurrentIdentifierSource() {
161 //return the token REAL source (aka unicodes are precomputed)
163 // if (withoutUnicodePtr != 0)
164 // //0 is used as a fast test flag so the real first char is in position 1
166 // withoutUnicodeBuffer,
168 // result = new char[withoutUnicodePtr],
170 // withoutUnicodePtr);
172 int length = currentPosition - startPosition;
173 switch (length) { // see OptimizedLength
175 return optimizedCurrentTokenSource1();
177 return optimizedCurrentTokenSource2();
179 return optimizedCurrentTokenSource3();
181 return optimizedCurrentTokenSource4();
183 return optimizedCurrentTokenSource5();
185 return optimizedCurrentTokenSource6();
188 System.arraycopy(source, startPosition, result = new char[length], 0,
193 public int getCurrentTokenEndPosition() {
194 return this.currentPosition - 1;
196 public final char[] getCurrentTokenSource() {
197 // Return the token REAL source (aka unicodes are precomputed)
199 // if (withoutUnicodePtr != 0)
200 // // 0 is used as a fast test flag so the real first char is in position 1
202 // withoutUnicodeBuffer,
204 // result = new char[withoutUnicodePtr],
206 // withoutUnicodePtr);
209 System.arraycopy(source, startPosition,
210 result = new char[length = currentPosition - startPosition], 0, length);
214 public final char[] getCurrentTokenSource(int startPos) {
215 // Return the token REAL source (aka unicodes are precomputed)
217 // if (withoutUnicodePtr != 0)
218 // // 0 is used as a fast test flag so the real first char is in position 1
220 // withoutUnicodeBuffer,
222 // result = new char[withoutUnicodePtr],
224 // withoutUnicodePtr);
227 System.arraycopy(source, startPos,
228 result = new char[length = currentPosition - startPos], 0, length);
232 public final char[] getCurrentTokenSourceString() {
233 //return the token REAL source (aka unicodes are precomputed).
234 //REMOVE the two " that are at the beginning and the end.
236 if (withoutUnicodePtr != 0)
237 //0 is used as a fast test flag so the real first char is in position 1
238 System.arraycopy(withoutUnicodeBuffer, 2,
239 //2 is 1 (real start) + 1 (to jump over the ")
240 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
243 System.arraycopy(source, startPosition + 1,
244 result = new char[length = currentPosition - startPosition - 2], 0,
249 public int getCurrentTokenStartPosition() {
250 return this.startPosition;
252 public final char[] getCurrentStringLiteralSource() {
253 // Return the token REAL source (aka unicodes are precomputed)
256 System.arraycopy(source, startPosition + 1,
257 result = new char[length = currentPosition - startPosition - 2], 0,
263 * Search the source position corresponding to the end of a given line number
265 * Line numbers are 1-based, and relative to the scanner initialPosition.
266 * Character positions are 0-based.
268 * In case the given line number is inconsistent, answers -1.
270 public final int getLineEnd(int lineNumber) {
271 if (lineEnds == null)
273 if (lineNumber >= lineEnds.length)
277 if (lineNumber == lineEnds.length - 1)
279 return lineEnds[lineNumber - 1];
280 // next line start one character behind the lineEnd of the previous line
283 * Search the source position corresponding to the beginning of a given line
286 * Line numbers are 1-based, and relative to the scanner initialPosition.
287 * Character positions are 0-based.
289 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
291 * In case the given line number is inconsistent, answers -1.
293 public final int getLineStart(int lineNumber) {
294 if (lineEnds == null)
296 if (lineNumber >= lineEnds.length)
301 return initialPosition;
302 return lineEnds[lineNumber - 2] + 1;
303 // next line start one character behind the lineEnd of the previous line
305 public final boolean getNextChar(char testedChar) {
307 //handle the case of unicode.
308 //when a unicode appears then we must use a buffer that holds char
310 //At the end of this method currentCharacter holds the new visited char
311 //and currentPosition points right next after it
312 //Both previous lines are true if the currentCharacter is == to the
314 //On false, no side effect has occured.
315 //ALL getNextChar.... ARE OPTIMIZED COPIES
316 int temp = currentPosition;
318 currentCharacter = source[currentPosition++];
319 // if (((currentCharacter = source[currentPosition++]) == '\\')
320 // && (source[currentPosition] == 'u')) {
321 // //-------------unicode traitement ------------
322 // int c1, c2, c3, c4;
323 // int unicodeSize = 6;
324 // currentPosition++;
325 // while (source[currentPosition] == 'u') {
326 // currentPosition++;
330 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
332 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
334 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
336 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
338 // currentPosition = temp;
342 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
343 // if (currentCharacter != testedChar) {
344 // currentPosition = temp;
347 // unicodeAsBackSlash = currentCharacter == '\\';
349 // //need the unicode buffer
350 // if (withoutUnicodePtr == 0) {
351 // //buffer all the entries that have been left aside....
352 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
356 // withoutUnicodeBuffer,
358 // withoutUnicodePtr);
360 // //fill the buffer with the char
361 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
364 // } //-------------end unicode traitement--------------
366 if (currentCharacter != testedChar) {
367 currentPosition = temp;
370 unicodeAsBackSlash = false;
371 // if (withoutUnicodePtr != 0)
372 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
375 } catch (IndexOutOfBoundsException e) {
376 unicodeAsBackSlash = false;
377 currentPosition = temp;
381 public final int getNextChar(char testedChar1, char testedChar2) {
382 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
383 //test can be done with (x==0) for the first and (x>0) for the second
384 //handle the case of unicode.
385 //when a unicode appears then we must use a buffer that holds char
387 //At the end of this method currentCharacter holds the new visited char
388 //and currentPosition points right next after it
389 //Both previous lines are true if the currentCharacter is == to the
391 //On false, no side effect has occured.
392 //ALL getNextChar.... ARE OPTIMIZED COPIES
393 int temp = currentPosition;
396 currentCharacter = source[currentPosition++];
397 // if (((currentCharacter = source[currentPosition++]) == '\\')
398 // && (source[currentPosition] == 'u')) {
399 // //-------------unicode traitement ------------
400 // int c1, c2, c3, c4;
401 // int unicodeSize = 6;
402 // currentPosition++;
403 // while (source[currentPosition] == 'u') {
404 // currentPosition++;
408 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
410 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
412 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
414 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
416 // currentPosition = temp;
420 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
421 // if (currentCharacter == testedChar1)
423 // else if (currentCharacter == testedChar2)
426 // currentPosition = temp;
430 // //need the unicode buffer
431 // if (withoutUnicodePtr == 0) {
432 // //buffer all the entries that have been left aside....
433 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
437 // withoutUnicodeBuffer,
439 // withoutUnicodePtr);
441 // //fill the buffer with the char
442 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
444 // } //-------------end unicode traitement--------------
446 if (currentCharacter == testedChar1)
448 else if (currentCharacter == testedChar2)
451 currentPosition = temp;
454 // if (withoutUnicodePtr != 0)
455 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
458 } catch (IndexOutOfBoundsException e) {
459 currentPosition = temp;
463 public final boolean getNextCharAsDigit() {
465 //handle the case of unicode.
466 //when a unicode appears then we must use a buffer that holds char
468 //At the end of this method currentCharacter holds the new visited char
469 //and currentPosition points right next after it
470 //Both previous lines are true if the currentCharacter is a digit
471 //On false, no side effect has occured.
472 //ALL getNextChar.... ARE OPTIMIZED COPIES
473 int temp = currentPosition;
475 currentCharacter = source[currentPosition++];
476 // if (((currentCharacter = source[currentPosition++]) == '\\')
477 // && (source[currentPosition] == 'u')) {
478 // //-------------unicode traitement ------------
479 // int c1, c2, c3, c4;
480 // int unicodeSize = 6;
481 // currentPosition++;
482 // while (source[currentPosition] == 'u') {
483 // currentPosition++;
487 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
489 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
491 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
493 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
495 // currentPosition = temp;
499 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
500 // if (!Character.isDigit(currentCharacter)) {
501 // currentPosition = temp;
505 // //need the unicode buffer
506 // if (withoutUnicodePtr == 0) {
507 // //buffer all the entries that have been left aside....
508 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
512 // withoutUnicodeBuffer,
514 // withoutUnicodePtr);
516 // //fill the buffer with the char
517 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
519 // } //-------------end unicode traitement--------------
521 if (!Character.isDigit(currentCharacter)) {
522 currentPosition = temp;
525 // if (withoutUnicodePtr != 0)
526 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
529 } catch (IndexOutOfBoundsException e) {
530 currentPosition = temp;
534 public final boolean getNextCharAsDigit(int radix) {
536 //handle the case of unicode.
537 //when a unicode appears then we must use a buffer that holds char
539 //At the end of this method currentCharacter holds the new visited char
540 //and currentPosition points right next after it
541 //Both previous lines are true if the currentCharacter is a digit base on
543 //On false, no side effect has occured.
544 //ALL getNextChar.... ARE OPTIMIZED COPIES
545 int temp = currentPosition;
547 currentCharacter = source[currentPosition++];
548 // if (((currentCharacter = source[currentPosition++]) == '\\')
549 // && (source[currentPosition] == 'u')) {
550 // //-------------unicode traitement ------------
551 // int c1, c2, c3, c4;
552 // int unicodeSize = 6;
553 // currentPosition++;
554 // while (source[currentPosition] == 'u') {
555 // currentPosition++;
559 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
561 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
563 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
565 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
567 // currentPosition = temp;
571 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
572 // if (Character.digit(currentCharacter, radix) == -1) {
573 // currentPosition = temp;
577 // //need the unicode buffer
578 // if (withoutUnicodePtr == 0) {
579 // //buffer all the entries that have been left aside....
580 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
584 // withoutUnicodeBuffer,
586 // withoutUnicodePtr);
588 // //fill the buffer with the char
589 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
591 // } //-------------end unicode traitement--------------
593 if (Character.digit(currentCharacter, radix) == -1) {
594 currentPosition = temp;
597 // if (withoutUnicodePtr != 0)
598 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
601 } catch (IndexOutOfBoundsException e) {
602 currentPosition = temp;
606 public boolean getNextCharAsJavaIdentifierPart() {
608 //handle the case of unicode.
609 //when a unicode appears then we must use a buffer that holds char
611 //At the end of this method currentCharacter holds the new visited char
612 //and currentPosition points right next after it
613 //Both previous lines are true if the currentCharacter is a
614 // JavaIdentifierPart
615 //On false, no side effect has occured.
616 //ALL getNextChar.... ARE OPTIMIZED COPIES
617 int temp = currentPosition;
619 currentCharacter = source[currentPosition++];
620 // if (((currentCharacter = source[currentPosition++]) == '\\')
621 // && (source[currentPosition] == 'u')) {
622 // //-------------unicode traitement ------------
623 // int c1, c2, c3, c4;
624 // int unicodeSize = 6;
625 // currentPosition++;
626 // while (source[currentPosition] == 'u') {
627 // currentPosition++;
631 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
633 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
635 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
637 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
639 // currentPosition = temp;
643 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
644 // if (!isPHPIdentifierPart(currentCharacter)) {
645 // currentPosition = temp;
649 // //need the unicode buffer
650 // if (withoutUnicodePtr == 0) {
651 // //buffer all the entries that have been left aside....
652 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
656 // withoutUnicodeBuffer,
658 // withoutUnicodePtr);
660 // //fill the buffer with the char
661 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
663 // } //-------------end unicode traitement--------------
665 if (!isPHPIdentifierPart(currentCharacter)) {
666 currentPosition = temp;
669 // if (withoutUnicodePtr != 0)
670 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
673 } catch (IndexOutOfBoundsException e) {
674 currentPosition = temp;
678 public int getNextToken() throws InvalidInputException {
679 int htmlPosition = currentPosition;
682 currentCharacter = source[currentPosition++];
683 if (currentCharacter == '<') {
684 if (getNextChar('?')) {
685 currentCharacter = source[currentPosition++];
686 if ((currentCharacter == ' ')
687 || Character.isWhitespace(currentCharacter)) {
689 startPosition = currentPosition;
691 if (tokenizeWhiteSpace) {
692 // && (whiteStart != currentPosition - 1)) {
693 // reposition scanner in case we are interested by spaces as
695 startPosition = htmlPosition;
696 return TokenNameHTML;
699 boolean phpStart = (currentCharacter == 'P')
700 || (currentCharacter == 'p');
702 int test = getNextChar('H', 'h');
704 test = getNextChar('P', 'p');
707 startPosition = currentPosition;
709 if (tokenizeWhiteSpace) {
710 // && (whiteStart != currentPosition - 1)) {
711 // reposition scanner in case we are interested by spaces
713 startPosition = htmlPosition;
714 return TokenNameHTML;
722 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
723 if (recordLineSeparator) {
730 } //-----------------end switch while try--------------------
731 catch (IndexOutOfBoundsException e) {
732 if (tokenizeWhiteSpace) {
733 // && (whiteStart != currentPosition - 1)) {
734 // reposition scanner in case we are interested by spaces as tokens
735 startPosition = htmlPosition;
742 jumpOverMethodBody();
744 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
747 while (true) { //loop for jumping over comments
748 withoutUnicodePtr = 0;
749 //start with a new token (even comment written with unicode )
750 // ---------Consume white space and handles startPosition---------
751 int whiteStart = currentPosition;
752 boolean isWhiteSpace;
754 startPosition = currentPosition;
755 currentCharacter = source[currentPosition++];
756 // if (((currentCharacter = source[currentPosition++]) == '\\')
757 // && (source[currentPosition] == 'u')) {
758 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
760 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
761 checkNonExternalizeString();
762 if (recordLineSeparator) {
768 isWhiteSpace = (currentCharacter == ' ')
769 || Character.isWhitespace(currentCharacter);
771 } while (isWhiteSpace);
772 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
773 // reposition scanner in case we are interested by spaces as tokens
775 startPosition = whiteStart;
776 return TokenNameWHITESPACE;
778 //little trick to get out in the middle of a source compuation
779 if (currentPosition > eofPosition)
781 // ---------Identify the next token-------------
782 switch (currentCharacter) {
784 return TokenNameLPAREN;
786 return TokenNameRPAREN;
788 return TokenNameLBRACE;
790 return TokenNameRBRACE;
792 return TokenNameLBRACKET;
794 return TokenNameRBRACKET;
796 return TokenNameSEMICOLON;
798 return TokenNameCOMMA;
800 if (getNextCharAsDigit())
801 return scanNumber(true);
806 if ((test = getNextChar('+', '=')) == 0)
807 return TokenNamePLUS_PLUS;
809 return TokenNamePLUS_EQUAL;
810 return TokenNamePLUS;
815 if ((test = getNextChar('-', '=')) == 0)
816 return TokenNameMINUS_MINUS;
818 return TokenNameMINUS_EQUAL;
819 if (getNextChar('>'))
820 return TokenNameMINUS_GREATER;
821 return TokenNameMINUS;
824 if (getNextChar('='))
825 return TokenNameTWIDDLE_EQUAL;
826 return TokenNameTWIDDLE;
828 if (getNextChar('=')) {
829 if (getNextChar('=')) {
830 return TokenNameNOT_EQUAL_EQUAL;
832 return TokenNameNOT_EQUAL;
836 if (getNextChar('='))
837 return TokenNameMULTIPLY_EQUAL;
838 return TokenNameMULTIPLY;
840 if (getNextChar('='))
841 return TokenNameREMAINDER_EQUAL;
842 return TokenNameREMAINDER;
846 if ((test = getNextChar('=', '<')) == 0)
847 return TokenNameLESS_EQUAL;
849 if (getNextChar('='))
850 return TokenNameLEFT_SHIFT_EQUAL;
851 if (getNextChar('<')) {
852 int heredocStart = currentPosition;
853 int heredocLength = 0;
854 currentCharacter = source[currentPosition++];
855 if (isPHPIdentifierStart(currentCharacter)) {
856 currentCharacter = source[currentPosition++];
858 return TokenNameERROR;
860 while (isPHPIdentifierPart(currentCharacter)) {
861 currentCharacter = source[currentPosition++];
863 heredocLength = currentPosition - heredocStart - 1;
864 // heredoc end-tag determination
865 boolean endTag = true;
868 ch = source[currentPosition++];
869 if (ch == '\r' || ch == '\n') {
870 if (recordLineSeparator) {
875 for (int i = 0; i < heredocLength; i++) {
876 if (source[currentPosition + i] != source[heredocStart
883 currentPosition += heredocLength - 1;
884 currentCharacter = source[currentPosition++];
885 break; // do...while loop
891 return TokenNameHEREDOC;
893 return TokenNameLEFT_SHIFT;
895 return TokenNameLESS;
900 if ((test = getNextChar('=', '>')) == 0)
901 return TokenNameGREATER_EQUAL;
903 if ((test = getNextChar('=', '>')) == 0)
904 return TokenNameRIGHT_SHIFT_EQUAL;
905 return TokenNameRIGHT_SHIFT;
907 return TokenNameGREATER;
910 if (getNextChar('=')) {
911 if (getNextChar('=')) {
912 return TokenNameEQUAL_EQUAL_EQUAL;
914 return TokenNameEQUAL_EQUAL;
916 if (getNextChar('>'))
917 return TokenNameEQUAL_GREATER;
918 return TokenNameEQUAL;
922 if ((test = getNextChar('&', '=')) == 0)
923 return TokenNameAND_AND;
925 return TokenNameAND_EQUAL;
931 if ((test = getNextChar('|', '=')) == 0)
932 return TokenNameOR_OR;
934 return TokenNameOR_EQUAL;
938 if (getNextChar('='))
939 return TokenNameXOR_EQUAL;
942 if (getNextChar('>')) {
944 return TokenNameStopPHP;
946 return TokenNameQUESTION;
948 if (getNextChar(':'))
949 return TokenNamePAAMAYIM_NEKUDOTAYIM;
950 return TokenNameCOLON;
956 // if ((test = getNextChar('\n', '\r')) == 0) {
957 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
960 // // relocate if finding another quote fairly close: thus unicode
961 // '/u000D' will be fully consumed
962 // for (int lookAhead = 0;
965 // if (currentPosition + lookAhead
968 // if (source[currentPosition + lookAhead]
971 // if (source[currentPosition + lookAhead]
973 // currentPosition += lookAhead + 1;
977 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
980 // if (getNextChar('\'')) {
981 // // relocate if finding another quote fairly close: thus unicode
982 // '/u000D' will be fully consumed
983 // for (int lookAhead = 0;
986 // if (currentPosition + lookAhead
989 // if (source[currentPosition + lookAhead]
992 // if (source[currentPosition + lookAhead]
994 // currentPosition += lookAhead + 1;
998 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1000 // if (getNextChar('\\'))
1001 // scanEscapeCharacter();
1002 // else { // consume next character
1003 // unicodeAsBackSlash = false;
1004 // if (((currentCharacter = source[currentPosition++])
1006 // && (source[currentPosition] == 'u')) {
1007 // getNextUnicodeChar();
1009 // if (withoutUnicodePtr != 0) {
1010 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1011 // currentCharacter;
1015 // // if (getNextChar('\''))
1016 // // return TokenNameCharacterLiteral;
1017 // // relocate if finding another quote fairly close: thus unicode
1018 // '/u000D' will be fully consumed
1019 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1020 // if (currentPosition + lookAhead == source.length)
1022 // if (source[currentPosition + lookAhead] == '\n')
1024 // if (source[currentPosition + lookAhead] == '\'') {
1025 // currentPosition += lookAhead + 1;
1029 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1032 // consume next character
1033 unicodeAsBackSlash = false;
1034 currentCharacter = source[currentPosition++];
1035 // if (((currentCharacter = source[currentPosition++]) == '\\')
1036 // && (source[currentPosition] == 'u')) {
1037 // getNextUnicodeChar();
1039 // if (withoutUnicodePtr != 0) {
1040 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1041 // currentCharacter;
1044 while (currentCharacter != '\'') {
1045 /** ** in PHP \r and \n are valid in string literals *** */
1046 // if ((currentCharacter == '\n')
1047 // || (currentCharacter == '\r')) {
1048 // // relocate if finding another quote fairly close: thus
1049 // unicode '/u000D' will be fully consumed
1050 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1051 // if (currentPosition + lookAhead == source.length)
1053 // if (source[currentPosition + lookAhead] == '\n')
1055 // if (source[currentPosition + lookAhead] == '\"') {
1056 // currentPosition += lookAhead + 1;
1060 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1062 if (currentCharacter == '\\') {
1063 int escapeSize = currentPosition;
1064 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1065 //scanEscapeCharacter make a side effect on this value and
1066 // we need the previous value few lines down this one
1067 scanSingleQuotedEscapeCharacter();
1068 escapeSize = currentPosition - escapeSize;
1069 if (withoutUnicodePtr == 0) {
1070 //buffer all the entries that have been left aside....
1071 withoutUnicodePtr = currentPosition - escapeSize - 1
1073 System.arraycopy(source, startPosition,
1074 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1075 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1076 } else { //overwrite the / in the buffer
1077 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1078 if (backSlashAsUnicodeInString) { //there are TWO \ in
1080 // only one is correct
1081 withoutUnicodePtr--;
1085 // consume next character
1086 unicodeAsBackSlash = false;
1087 currentCharacter = source[currentPosition++];
1088 // if (((currentCharacter = source[currentPosition++]) ==
1090 // && (source[currentPosition] == 'u')) {
1091 // getNextUnicodeChar();
1093 if (withoutUnicodePtr != 0) {
1094 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1098 } catch (IndexOutOfBoundsException e) {
1099 throw new InvalidInputException(UNTERMINATED_STRING);
1100 } catch (InvalidInputException e) {
1101 if (e.getMessage().equals(INVALID_ESCAPE)) {
1102 // relocate if finding another quote fairly close: thus
1103 // unicode '/u000D' will be fully consumed
1104 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1105 if (currentPosition + lookAhead == source.length)
1107 if (source[currentPosition + lookAhead] == '\n')
1109 if (source[currentPosition + lookAhead] == '\'') {
1110 currentPosition += lookAhead + 1;
1117 if (checkNonExternalizedStringLiterals) { // check for presence
1119 // //$NON-NLS-?$ where
1121 if (currentLine == null) {
1122 currentLine = new NLSLine();
1123 lines.add(currentLine);
1125 currentLine.add(new StringLiteral(
1126 getCurrentTokenSourceString(), startPosition,
1127 currentPosition - 1));
1129 return TokenNameStringConstant;
1132 // consume next character
1133 unicodeAsBackSlash = false;
1134 currentCharacter = source[currentPosition++];
1135 // if (((currentCharacter = source[currentPosition++]) == '\\')
1136 // && (source[currentPosition] == 'u')) {
1137 // getNextUnicodeChar();
1139 // if (withoutUnicodePtr != 0) {
1140 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1141 // currentCharacter;
1144 while (currentCharacter != '"') {
1145 /** ** in PHP \r and \n are valid in string literals *** */
1146 // if ((currentCharacter == '\n')
1147 // || (currentCharacter == '\r')) {
1148 // // relocate if finding another quote fairly close: thus
1149 // unicode '/u000D' will be fully consumed
1150 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1151 // if (currentPosition + lookAhead == source.length)
1153 // if (source[currentPosition + lookAhead] == '\n')
1155 // if (source[currentPosition + lookAhead] == '\"') {
1156 // currentPosition += lookAhead + 1;
1160 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1162 if (currentCharacter == '\\') {
1163 int escapeSize = currentPosition;
1164 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1165 //scanEscapeCharacter make a side effect on this value and
1166 // we need the previous value few lines down this one
1167 scanDoubleQuotedEscapeCharacter();
1168 escapeSize = currentPosition - escapeSize;
1169 if (withoutUnicodePtr == 0) {
1170 //buffer all the entries that have been left aside....
1171 withoutUnicodePtr = currentPosition - escapeSize - 1
1173 System.arraycopy(source, startPosition,
1174 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1175 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1176 } else { //overwrite the / in the buffer
1177 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1178 if (backSlashAsUnicodeInString) { //there are TWO \ in
1180 // only one is correct
1181 withoutUnicodePtr--;
1185 // consume next character
1186 unicodeAsBackSlash = false;
1187 currentCharacter = source[currentPosition++];
1188 // if (((currentCharacter = source[currentPosition++]) ==
1190 // && (source[currentPosition] == 'u')) {
1191 // getNextUnicodeChar();
1193 if (withoutUnicodePtr != 0) {
1194 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1198 } catch (IndexOutOfBoundsException e) {
1199 throw new InvalidInputException(UNTERMINATED_STRING);
1200 } catch (InvalidInputException e) {
1201 if (e.getMessage().equals(INVALID_ESCAPE)) {
1202 // relocate if finding another quote fairly close: thus
1203 // unicode '/u000D' will be fully consumed
1204 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1205 if (currentPosition + lookAhead == source.length)
1207 if (source[currentPosition + lookAhead] == '\n')
1209 if (source[currentPosition + lookAhead] == '\"') {
1210 currentPosition += lookAhead + 1;
1217 if (checkNonExternalizedStringLiterals) { // check for presence
1219 // //$NON-NLS-?$ where
1221 if (currentLine == null) {
1222 currentLine = new NLSLine();
1223 lines.add(currentLine);
1225 currentLine.add(new StringLiteral(
1226 getCurrentTokenSourceString(), startPosition,
1227 currentPosition - 1));
1229 return TokenNameStringLiteral;
1232 // consume next character
1233 unicodeAsBackSlash = false;
1234 currentCharacter = source[currentPosition++];
1235 // if (((currentCharacter = source[currentPosition++]) == '\\')
1236 // && (source[currentPosition] == 'u')) {
1237 // getNextUnicodeChar();
1239 // if (withoutUnicodePtr != 0) {
1240 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1241 // currentCharacter;
1244 while (currentCharacter != '`') {
1245 /** ** in PHP \r and \n are valid in string literals *** */
1246 // if ((currentCharacter == '\n')
1247 // || (currentCharacter == '\r')) {
1248 // // relocate if finding another quote fairly close: thus
1249 // unicode '/u000D' will be fully consumed
1250 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1251 // if (currentPosition + lookAhead == source.length)
1253 // if (source[currentPosition + lookAhead] == '\n')
1255 // if (source[currentPosition + lookAhead] == '\"') {
1256 // currentPosition += lookAhead + 1;
1260 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1262 if (currentCharacter == '\\') {
1263 int escapeSize = currentPosition;
1264 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1265 //scanEscapeCharacter make a side effect on this value and
1266 // we need the previous value few lines down this one
1267 scanDoubleQuotedEscapeCharacter();
1268 escapeSize = currentPosition - escapeSize;
1269 if (withoutUnicodePtr == 0) {
1270 //buffer all the entries that have been left aside....
1271 withoutUnicodePtr = currentPosition - escapeSize - 1
1273 System.arraycopy(source, startPosition,
1274 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1275 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1276 } else { //overwrite the / in the buffer
1277 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1278 if (backSlashAsUnicodeInString) { //there are TWO \ in
1280 // only one is correct
1281 withoutUnicodePtr--;
1285 // consume next character
1286 unicodeAsBackSlash = false;
1287 currentCharacter = source[currentPosition++];
1288 // if (((currentCharacter = source[currentPosition++]) ==
1290 // && (source[currentPosition] == 'u')) {
1291 // getNextUnicodeChar();
1293 if (withoutUnicodePtr != 0) {
1294 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1298 } catch (IndexOutOfBoundsException e) {
1299 throw new InvalidInputException(UNTERMINATED_STRING);
1300 } catch (InvalidInputException e) {
1301 if (e.getMessage().equals(INVALID_ESCAPE)) {
1302 // relocate if finding another quote fairly close: thus
1303 // unicode '/u000D' will be fully consumed
1304 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1305 if (currentPosition + lookAhead == source.length)
1307 if (source[currentPosition + lookAhead] == '\n')
1309 if (source[currentPosition + lookAhead] == '`') {
1310 currentPosition += lookAhead + 1;
1317 if (checkNonExternalizedStringLiterals) { // check for presence
1319 // //$NON-NLS-?$ where
1321 if (currentLine == null) {
1322 currentLine = new NLSLine();
1323 lines.add(currentLine);
1325 currentLine.add(new StringLiteral(
1326 getCurrentTokenSourceString(), startPosition,
1327 currentPosition - 1));
1329 return TokenNameStringInterpolated;
1334 if ((currentCharacter == '#')
1335 || (test = getNextChar('/', '*')) == 0) {
1337 int endPositionForLineComment = 0;
1338 try { //get the next char
1339 currentCharacter = source[currentPosition++];
1340 // if (((currentCharacter = source[currentPosition++])
1342 // && (source[currentPosition] == 'u')) {
1343 // //-------------unicode traitement ------------
1344 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1345 // currentPosition++;
1346 // while (source[currentPosition] == 'u') {
1347 // currentPosition++;
1350 // Character.getNumericValue(source[currentPosition++]))
1354 // Character.getNumericValue(source[currentPosition++]))
1358 // Character.getNumericValue(source[currentPosition++]))
1362 // Character.getNumericValue(source[currentPosition++]))
1365 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1367 // currentCharacter =
1368 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1371 //handle the \\u case manually into comment
1372 // if (currentCharacter == '\\') {
1373 // if (source[currentPosition] == '\\')
1374 // currentPosition++;
1375 // } //jump over the \\
1376 boolean isUnicode = false;
1377 while (currentCharacter != '\r' && currentCharacter != '\n') {
1378 if (currentCharacter == '?') {
1379 if (getNextChar('>')) {
1380 startPosition = currentPosition - 2;
1382 return TokenNameStopPHP;
1387 currentCharacter = source[currentPosition++];
1388 // if (((currentCharacter = source[currentPosition++])
1390 // && (source[currentPosition] == 'u')) {
1391 // isUnicode = true;
1392 // //-------------unicode traitement ------------
1393 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1394 // currentPosition++;
1395 // while (source[currentPosition] == 'u') {
1396 // currentPosition++;
1399 // Character.getNumericValue(source[currentPosition++]))
1403 // Character.getNumericValue(
1404 // source[currentPosition++]))
1408 // Character.getNumericValue(
1409 // source[currentPosition++]))
1413 // Character.getNumericValue(
1414 // source[currentPosition++]))
1418 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1420 // currentCharacter =
1421 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1424 //handle the \\u case manually into comment
1425 // if (currentCharacter == '\\') {
1426 // if (source[currentPosition] == '\\')
1427 // currentPosition++;
1428 // } //jump over the \\
1431 endPositionForLineComment = currentPosition - 6;
1433 endPositionForLineComment = currentPosition - 1;
1435 recordComment(false);
1436 if ((currentCharacter == '\r')
1437 || (currentCharacter == '\n')) {
1438 checkNonExternalizeString();
1439 if (recordLineSeparator) {
1441 pushUnicodeLineSeparator();
1443 pushLineSeparator();
1449 if (tokenizeComments) {
1451 currentPosition = endPositionForLineComment;
1452 // reset one character behind
1454 return TokenNameCOMMENT_LINE;
1456 } catch (IndexOutOfBoundsException e) { //an eof will them
1458 if (tokenizeComments) {
1460 // reset one character behind
1461 return TokenNameCOMMENT_LINE;
1467 //traditional and annotation comment
1468 boolean isJavadoc = false, star = false;
1469 // consume next character
1470 unicodeAsBackSlash = false;
1471 currentCharacter = source[currentPosition++];
1472 // if (((currentCharacter = source[currentPosition++]) ==
1474 // && (source[currentPosition] == 'u')) {
1475 // getNextUnicodeChar();
1477 // if (withoutUnicodePtr != 0) {
1478 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1479 // currentCharacter;
1482 if (currentCharacter == '*') {
1486 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1487 checkNonExternalizeString();
1488 if (recordLineSeparator) {
1489 pushLineSeparator();
1494 try { //get the next char
1495 currentCharacter = source[currentPosition++];
1496 // if (((currentCharacter = source[currentPosition++])
1498 // && (source[currentPosition] == 'u')) {
1499 // //-------------unicode traitement ------------
1500 // getNextUnicodeChar();
1502 //handle the \\u case manually into comment
1503 // if (currentCharacter == '\\') {
1504 // if (source[currentPosition] == '\\')
1505 // currentPosition++;
1506 // //jump over the \\
1508 // empty comment is not a javadoc /**/
1509 if (currentCharacter == '/') {
1512 //loop until end of comment */
1513 while ((currentCharacter != '/') || (!star)) {
1514 if ((currentCharacter == '\r')
1515 || (currentCharacter == '\n')) {
1516 checkNonExternalizeString();
1517 if (recordLineSeparator) {
1518 pushLineSeparator();
1523 star = currentCharacter == '*';
1525 currentCharacter = source[currentPosition++];
1526 // if (((currentCharacter = source[currentPosition++])
1528 // && (source[currentPosition] == 'u')) {
1529 // //-------------unicode traitement ------------
1530 // getNextUnicodeChar();
1532 //handle the \\u case manually into comment
1533 // if (currentCharacter == '\\') {
1534 // if (source[currentPosition] == '\\')
1535 // currentPosition++;
1536 // } //jump over the \\
1538 recordComment(isJavadoc);
1539 if (tokenizeComments) {
1541 return TokenNameCOMMENT_PHPDOC;
1542 return TokenNameCOMMENT_BLOCK;
1544 } catch (IndexOutOfBoundsException e) {
1545 throw new InvalidInputException(UNTERMINATED_COMMENT);
1549 if (getNextChar('='))
1550 return TokenNameDIVIDE_EQUAL;
1551 return TokenNameDIVIDE;
1555 return TokenNameEOF;
1556 //the atEnd may not be <currentPosition == source.length> if
1557 // source is only some part of a real (external) stream
1558 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1560 if (currentCharacter == '$') {
1561 while ((currentCharacter = source[currentPosition++]) == '$') {
1563 if (currentCharacter == '{')
1564 return TokenNameDOLLAR_LBRACE;
1565 if (isPHPIdentifierStart(currentCharacter))
1566 return scanIdentifierOrKeyword(true);
1567 return TokenNameERROR;
1569 if (isPHPIdentifierStart(currentCharacter))
1570 return scanIdentifierOrKeyword(false);
1571 if (Character.isDigit(currentCharacter))
1572 return scanNumber(false);
1573 return TokenNameERROR;
1576 } //-----------------end switch while try--------------------
1577 catch (IndexOutOfBoundsException e) {
1580 return TokenNameEOF;
1582 // public final void getNextUnicodeChar()
1583 // throws IndexOutOfBoundsException, InvalidInputException {
1585 // //handle the case of unicode.
1586 // //when a unicode appears then we must use a buffer that holds char
1588 // //At the end of this method currentCharacter holds the new visited char
1589 // //and currentPosition points right next after it
1591 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1593 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1594 // currentPosition++;
1595 // while (source[currentPosition] == 'u') {
1596 // currentPosition++;
1600 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1602 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1604 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1606 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1608 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1610 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1611 // //need the unicode buffer
1612 // if (withoutUnicodePtr == 0) {
1613 // //buffer all the entries that have been left aside....
1614 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1615 // System.arraycopy(
1618 // withoutUnicodeBuffer,
1620 // withoutUnicodePtr);
1622 // //fill the buffer with the char
1623 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1625 // unicodeAsBackSlash = currentCharacter == '\\';
1628 * Tokenize a method body, assuming that curly brackets are properly
1631 public final void jumpOverMethodBody() {
1632 this.wasAcr = false;
1635 while (true) { //loop for jumping over comments
1636 // ---------Consume white space and handles startPosition---------
1637 boolean isWhiteSpace;
1639 startPosition = currentPosition;
1640 currentCharacter = source[currentPosition++];
1641 // if (((currentCharacter = source[currentPosition++]) == '\\')
1642 // && (source[currentPosition] == 'u')) {
1643 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1645 if (recordLineSeparator
1646 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1647 pushLineSeparator();
1648 isWhiteSpace = Character.isWhitespace(currentCharacter);
1650 } while (isWhiteSpace);
1651 // -------consume token until } is found---------
1652 switch (currentCharacter) {
1664 test = getNextChar('\\');
1667 scanDoubleQuotedEscapeCharacter();
1668 } catch (InvalidInputException ex) {
1671 // try { // consume next character
1672 unicodeAsBackSlash = false;
1673 currentCharacter = source[currentPosition++];
1674 // if (((currentCharacter = source[currentPosition++]) == '\\')
1675 // && (source[currentPosition] == 'u')) {
1676 // getNextUnicodeChar();
1678 if (withoutUnicodePtr != 0) {
1679 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1682 // } catch (InvalidInputException ex) {
1690 // try { // consume next character
1691 unicodeAsBackSlash = false;
1692 currentCharacter = source[currentPosition++];
1693 // if (((currentCharacter = source[currentPosition++]) == '\\')
1694 // && (source[currentPosition] == 'u')) {
1695 // getNextUnicodeChar();
1697 if (withoutUnicodePtr != 0) {
1698 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1701 // } catch (InvalidInputException ex) {
1703 while (currentCharacter != '"') {
1704 if (currentCharacter == '\r') {
1705 if (source[currentPosition] == '\n')
1708 // the string cannot go further that the line
1710 if (currentCharacter == '\n') {
1712 // the string cannot go further that the line
1714 if (currentCharacter == '\\') {
1716 scanDoubleQuotedEscapeCharacter();
1717 } catch (InvalidInputException ex) {
1720 // try { // consume next character
1721 unicodeAsBackSlash = false;
1722 currentCharacter = source[currentPosition++];
1723 // if (((currentCharacter = source[currentPosition++]) == '\\')
1724 // && (source[currentPosition] == 'u')) {
1725 // getNextUnicodeChar();
1727 if (withoutUnicodePtr != 0) {
1728 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1731 // } catch (InvalidInputException ex) {
1734 } catch (IndexOutOfBoundsException e) {
1741 if ((test = getNextChar('/', '*')) == 0) {
1745 currentCharacter = source[currentPosition++];
1746 // if (((currentCharacter = source[currentPosition++]) ==
1748 // && (source[currentPosition] == 'u')) {
1749 // //-------------unicode traitement ------------
1750 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1751 // currentPosition++;
1752 // while (source[currentPosition] == 'u') {
1753 // currentPosition++;
1756 // Character.getNumericValue(source[currentPosition++]))
1760 // Character.getNumericValue(source[currentPosition++]))
1764 // Character.getNumericValue(source[currentPosition++]))
1768 // Character.getNumericValue(source[currentPosition++]))
1771 // //error don't care of the value
1772 // currentCharacter = 'A';
1773 // } //something different from \n and \r
1775 // currentCharacter =
1776 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1779 while (currentCharacter != '\r' && currentCharacter != '\n') {
1781 currentCharacter = source[currentPosition++];
1782 // if (((currentCharacter = source[currentPosition++])
1784 // && (source[currentPosition] == 'u')) {
1785 // //-------------unicode traitement ------------
1786 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1787 // currentPosition++;
1788 // while (source[currentPosition] == 'u') {
1789 // currentPosition++;
1792 // Character.getNumericValue(source[currentPosition++]))
1796 // Character.getNumericValue(source[currentPosition++]))
1800 // Character.getNumericValue(source[currentPosition++]))
1804 // Character.getNumericValue(source[currentPosition++]))
1807 // //error don't care of the value
1808 // currentCharacter = 'A';
1809 // } //something different from \n and \r
1811 // currentCharacter =
1812 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1816 if (recordLineSeparator
1817 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1818 pushLineSeparator();
1819 } catch (IndexOutOfBoundsException e) {
1820 } //an eof will them be generated
1824 //traditional and annotation comment
1825 boolean star = false;
1826 // try { // consume next character
1827 unicodeAsBackSlash = false;
1828 currentCharacter = source[currentPosition++];
1829 // if (((currentCharacter = source[currentPosition++]) == '\\')
1830 // && (source[currentPosition] == 'u')) {
1831 // getNextUnicodeChar();
1833 if (withoutUnicodePtr != 0) {
1834 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1837 // } catch (InvalidInputException ex) {
1839 if (currentCharacter == '*') {
1842 if (recordLineSeparator
1843 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1844 pushLineSeparator();
1845 try { //get the next char
1846 currentCharacter = source[currentPosition++];
1847 // if (((currentCharacter = source[currentPosition++]) ==
1849 // && (source[currentPosition] == 'u')) {
1850 // //-------------unicode traitement ------------
1851 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1852 // currentPosition++;
1853 // while (source[currentPosition] == 'u') {
1854 // currentPosition++;
1857 // Character.getNumericValue(source[currentPosition++]))
1861 // Character.getNumericValue(source[currentPosition++]))
1865 // Character.getNumericValue(source[currentPosition++]))
1869 // Character.getNumericValue(source[currentPosition++]))
1872 // //error don't care of the value
1873 // currentCharacter = 'A';
1874 // } //something different from * and /
1876 // currentCharacter =
1877 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1880 //loop until end of comment */
1881 while ((currentCharacter != '/') || (!star)) {
1882 if (recordLineSeparator
1883 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1884 pushLineSeparator();
1885 star = currentCharacter == '*';
1887 currentCharacter = source[currentPosition++];
1888 // if (((currentCharacter = source[currentPosition++])
1890 // && (source[currentPosition] == 'u')) {
1891 // //-------------unicode traitement ------------
1892 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1893 // currentPosition++;
1894 // while (source[currentPosition] == 'u') {
1895 // currentPosition++;
1898 // Character.getNumericValue(source[currentPosition++]))
1902 // Character.getNumericValue(source[currentPosition++]))
1906 // Character.getNumericValue(source[currentPosition++]))
1910 // Character.getNumericValue(source[currentPosition++]))
1913 // //error don't care of the value
1914 // currentCharacter = 'A';
1915 // } //something different from * and /
1917 // currentCharacter =
1918 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1922 } catch (IndexOutOfBoundsException e) {
1930 if (isPHPIdentifierStart(currentCharacter)
1931 || currentCharacter == '$') {
1933 scanIdentifierOrKeyword((currentCharacter == '$'));
1934 } catch (InvalidInputException ex) {
1938 if (Character.isDigit(currentCharacter)) {
1941 } catch (InvalidInputException ex) {
1947 //-----------------end switch while try--------------------
1948 } catch (IndexOutOfBoundsException e) {
1949 } catch (InvalidInputException e) {
1953 // public final boolean jumpOverUnicodeWhiteSpace()
1954 // throws InvalidInputException {
1956 // //handle the case of unicode. Jump over the next whiteSpace
1957 // //making startPosition pointing on the next available char
1958 // //On false, the currentCharacter is filled up with a potential
1962 // this.wasAcr = false;
1963 // int c1, c2, c3, c4;
1964 // int unicodeSize = 6;
1965 // currentPosition++;
1966 // while (source[currentPosition] == 'u') {
1967 // currentPosition++;
1971 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1973 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1975 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1977 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1979 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1982 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1983 // if (recordLineSeparator
1984 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1985 // pushLineSeparator();
1986 // if (Character.isWhitespace(currentCharacter))
1989 // //buffer the new char which is not a white space
1990 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1991 // //withoutUnicodePtr == 1 is true here
1993 // } catch (IndexOutOfBoundsException e) {
1994 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1997 public final int[] getLineEnds() {
1998 //return a bounded copy of this.lineEnds
2000 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2003 public char[] getSource() {
2006 final char[] optimizedCurrentTokenSource1() {
2007 //return always the same char[] build only once
2008 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2009 char charOne = source[startPosition];
2064 return new char[]{charOne};
2067 final char[] optimizedCurrentTokenSource2() {
2068 //try to return the same char[] build only once
2070 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2072 char[][] table = charArray_length[0][hash];
2074 while (++i < InternalTableSize) {
2075 char[] charArray = table[i];
2076 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2079 //---------other side---------
2081 int max = newEntry2;
2082 while (++i <= max) {
2083 char[] charArray = table[i];
2084 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2087 //--------add the entry-------
2088 if (++max >= InternalTableSize)
2091 table[max] = (r = new char[]{c0, c1});
2095 final char[] optimizedCurrentTokenSource3() {
2096 //try to return the same char[] build only once
2098 int hash = (((c0 = source[startPosition]) << 12)
2099 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2101 char[][] table = charArray_length[1][hash];
2103 while (++i < InternalTableSize) {
2104 char[] charArray = table[i];
2105 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2108 //---------other side---------
2110 int max = newEntry3;
2111 while (++i <= max) {
2112 char[] charArray = table[i];
2113 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2116 //--------add the entry-------
2117 if (++max >= InternalTableSize)
2120 table[max] = (r = new char[]{c0, c1, c2});
2124 final char[] optimizedCurrentTokenSource4() {
2125 //try to return the same char[] build only once
2126 char c0, c1, c2, c3;
2127 long hash = ((((long) (c0 = source[startPosition])) << 18)
2128 + ((c1 = source[startPosition + 1]) << 12)
2129 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2131 char[][] table = charArray_length[2][(int) hash];
2133 while (++i < InternalTableSize) {
2134 char[] charArray = table[i];
2135 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2136 && (c3 == charArray[3]))
2139 //---------other side---------
2141 int max = newEntry4;
2142 while (++i <= max) {
2143 char[] charArray = table[i];
2144 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2145 && (c3 == charArray[3]))
2148 //--------add the entry-------
2149 if (++max >= InternalTableSize)
2152 table[max] = (r = new char[]{c0, c1, c2, c3});
2156 final char[] optimizedCurrentTokenSource5() {
2157 //try to return the same char[] build only once
2158 char c0, c1, c2, c3, c4;
2159 long hash = ((((long) (c0 = source[startPosition])) << 24)
2160 + (((long) (c1 = source[startPosition + 1])) << 18)
2161 + ((c2 = source[startPosition + 2]) << 12)
2162 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2164 char[][] table = charArray_length[3][(int) hash];
2166 while (++i < InternalTableSize) {
2167 char[] charArray = table[i];
2168 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2169 && (c3 == charArray[3]) && (c4 == charArray[4]))
2172 //---------other side---------
2174 int max = newEntry5;
2175 while (++i <= max) {
2176 char[] charArray = table[i];
2177 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2178 && (c3 == charArray[3]) && (c4 == charArray[4]))
2181 //--------add the entry-------
2182 if (++max >= InternalTableSize)
2185 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2189 final char[] optimizedCurrentTokenSource6() {
2190 //try to return the same char[] build only once
2191 char c0, c1, c2, c3, c4, c5;
2192 long hash = ((((long) (c0 = source[startPosition])) << 32)
2193 + (((long) (c1 = source[startPosition + 1])) << 24)
2194 + (((long) (c2 = source[startPosition + 2])) << 18)
2195 + ((c3 = source[startPosition + 3]) << 12)
2196 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2198 char[][] table = charArray_length[4][(int) hash];
2200 while (++i < InternalTableSize) {
2201 char[] charArray = table[i];
2202 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2203 && (c3 == charArray[3]) && (c4 == charArray[4])
2204 && (c5 == charArray[5]))
2207 //---------other side---------
2209 int max = newEntry6;
2210 while (++i <= max) {
2211 char[] charArray = table[i];
2212 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2213 && (c3 == charArray[3]) && (c4 == charArray[4])
2214 && (c5 == charArray[5]))
2217 //--------add the entry-------
2218 if (++max >= InternalTableSize)
2221 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2225 public final void pushLineSeparator() throws InvalidInputException {
2226 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2227 final int INCREMENT = 250;
2228 if (this.checkNonExternalizedStringLiterals) {
2229 // reinitialize the current line for non externalize strings purpose
2232 //currentCharacter is at position currentPosition-1
2234 if (currentCharacter == '\r') {
2235 int separatorPos = currentPosition - 1;
2236 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2238 //System.out.println("CR-" + separatorPos);
2240 lineEnds[++linePtr] = separatorPos;
2241 } catch (IndexOutOfBoundsException e) {
2242 //linePtr value is correct
2243 int oldLength = lineEnds.length;
2244 int[] old = lineEnds;
2245 lineEnds = new int[oldLength + INCREMENT];
2246 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2247 lineEnds[linePtr] = separatorPos;
2249 // look-ahead for merged cr+lf
2251 if (source[currentPosition] == '\n') {
2252 //System.out.println("look-ahead LF-" + currentPosition);
2253 lineEnds[linePtr] = currentPosition;
2259 } catch (IndexOutOfBoundsException e) {
2264 if (currentCharacter == '\n') {
2265 //must merge eventual cr followed by lf
2266 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2267 //System.out.println("merge LF-" + (currentPosition - 1));
2268 lineEnds[linePtr] = currentPosition - 1;
2270 int separatorPos = currentPosition - 1;
2271 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2273 // System.out.println("LF-" + separatorPos);
2275 lineEnds[++linePtr] = separatorPos;
2276 } catch (IndexOutOfBoundsException e) {
2277 //linePtr value is correct
2278 int oldLength = lineEnds.length;
2279 int[] old = lineEnds;
2280 lineEnds = new int[oldLength + INCREMENT];
2281 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2282 lineEnds[linePtr] = separatorPos;
2289 public final void pushUnicodeLineSeparator() {
2290 // isUnicode means that the \r or \n has been read as a unicode character
2291 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2292 final int INCREMENT = 250;
2293 //currentCharacter is at position currentPosition-1
2294 if (this.checkNonExternalizedStringLiterals) {
2295 // reinitialize the current line for non externalize strings purpose
2299 if (currentCharacter == '\r') {
2300 int separatorPos = currentPosition - 6;
2301 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2303 //System.out.println("CR-" + separatorPos);
2305 lineEnds[++linePtr] = separatorPos;
2306 } catch (IndexOutOfBoundsException e) {
2307 //linePtr value is correct
2308 int oldLength = lineEnds.length;
2309 int[] old = lineEnds;
2310 lineEnds = new int[oldLength + INCREMENT];
2311 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2312 lineEnds[linePtr] = separatorPos;
2314 // look-ahead for merged cr+lf
2315 if (source[currentPosition] == '\n') {
2316 //System.out.println("look-ahead LF-" + currentPosition);
2317 lineEnds[linePtr] = currentPosition;
2325 if (currentCharacter == '\n') {
2326 //must merge eventual cr followed by lf
2327 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2328 //System.out.println("merge LF-" + (currentPosition - 1));
2329 lineEnds[linePtr] = currentPosition - 6;
2331 int separatorPos = currentPosition - 6;
2332 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2334 // System.out.println("LF-" + separatorPos);
2336 lineEnds[++linePtr] = separatorPos;
2337 } catch (IndexOutOfBoundsException e) {
2338 //linePtr value is correct
2339 int oldLength = lineEnds.length;
2340 int[] old = lineEnds;
2341 lineEnds = new int[oldLength + INCREMENT];
2342 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2343 lineEnds[linePtr] = separatorPos;
2350 public final void recordComment(boolean isJavadoc) {
2351 // a new annotation comment is recorded
2353 commentStops[++commentPtr] = isJavadoc
2356 } catch (IndexOutOfBoundsException e) {
2357 int oldStackLength = commentStops.length;
2358 int[] oldStack = commentStops;
2359 commentStops = new int[oldStackLength + 30];
2360 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2361 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2362 //grows the positions buffers too
2363 int[] old = commentStarts;
2364 commentStarts = new int[oldStackLength + 30];
2365 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2367 //the buffer is of a correct size here
2368 commentStarts[commentPtr] = startPosition;
2370 public void resetTo(int begin, int end) {
2371 //reset the scanner to a given position where it may rescan again
2373 initialPosition = startPosition = currentPosition = begin;
2374 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2375 commentPtr = -1; // reset comment stack
2377 public final void scanSingleQuotedEscapeCharacter()
2378 throws InvalidInputException {
2379 // the string with "\\u" is a legal string of two chars \ and u
2380 //thus we use a direct access to the source (for regular cases).
2381 // if (unicodeAsBackSlash) {
2382 // // consume next character
2383 // unicodeAsBackSlash = false;
2384 // if (((currentCharacter = source[currentPosition++]) == '\\')
2385 // && (source[currentPosition] == 'u')) {
2386 // getNextUnicodeChar();
2388 // if (withoutUnicodePtr != 0) {
2389 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2393 currentCharacter = source[currentPosition++];
2394 switch (currentCharacter) {
2396 currentCharacter = '\'';
2399 currentCharacter = '\\';
2402 currentCharacter = '\\';
2406 public final void scanDoubleQuotedEscapeCharacter()
2407 throws InvalidInputException {
2408 // the string with "\\u" is a legal string of two chars \ and u
2409 //thus we use a direct access to the source (for regular cases).
2410 // if (unicodeAsBackSlash) {
2411 // // consume next character
2412 // unicodeAsBackSlash = false;
2413 // if (((currentCharacter = source[currentPosition++]) == '\\')
2414 // && (source[currentPosition] == 'u')) {
2415 // getNextUnicodeChar();
2417 // if (withoutUnicodePtr != 0) {
2418 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2422 currentCharacter = source[currentPosition++];
2423 switch (currentCharacter) {
2425 // currentCharacter = '\b';
2428 currentCharacter = '\t';
2431 currentCharacter = '\n';
2434 // currentCharacter = '\f';
2437 currentCharacter = '\r';
2440 currentCharacter = '\"';
2443 currentCharacter = '\'';
2446 currentCharacter = '\\';
2449 currentCharacter = '$';
2452 // -----------octal escape--------------
2454 // OctalDigit OctalDigit
2455 // ZeroToThree OctalDigit OctalDigit
2456 int number = Character.getNumericValue(currentCharacter);
2457 if (number >= 0 && number <= 7) {
2458 boolean zeroToThreeNot = number > 3;
2459 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2460 int digit = Character.getNumericValue(currentCharacter);
2461 if (digit >= 0 && digit <= 7) {
2462 number = (number * 8) + digit;
2464 .isDigit(currentCharacter = source[currentPosition++])) {
2465 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2466 // Digit --> ignore last character
2469 digit = Character.getNumericValue(currentCharacter);
2470 if (digit >= 0 && digit <= 7) {
2471 // has read \ZeroToThree OctalDigit OctalDigit
2472 number = (number * 8) + digit;
2473 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2474 // --> ignore last character
2478 } else { // has read \OctalDigit NonDigit--> ignore last
2482 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2486 } else { // has read \OctalDigit --> ignore last character
2490 throw new InvalidInputException(INVALID_ESCAPE);
2491 currentCharacter = (char) number;
2494 // throw new InvalidInputException(INVALID_ESCAPE);
2497 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2498 // return scanIdentifierOrKeyword( false );
2500 public int scanIdentifierOrKeyword(boolean isVariable)
2501 throws InvalidInputException {
2503 //first dispatch on the first char.
2504 //then the length. If there are several
2505 //keywors with the same length AND the same first char, then do another
2506 //disptach on the second char :-)...cool....but fast !
2507 useAssertAsAnIndentifier = false;
2508 while (getNextCharAsJavaIdentifierPart()) {
2511 if (new String(getCurrentTokenSource()).equals("$this")) {
2512 return TokenNamethis;
2514 return TokenNameVariable;
2519 // if (withoutUnicodePtr == 0)
2520 //quick test on length == 1 but not on length > 12 while most identifier
2521 //have a length which is <= 12...but there are lots of identifier with
2524 if ((length = currentPosition - startPosition) == 1)
2525 return TokenNameIdentifier;
2527 data = new char[length];
2528 index = startPosition;
2529 for (int i = 0; i < length; i++) {
2530 data[i] = Character.toLowerCase(source[index + i]);
2534 // if ((length = withoutUnicodePtr) == 1)
2535 // return TokenNameIdentifier;
2536 // // data = withoutUnicodeBuffer;
2537 // data = new char[withoutUnicodeBuffer.length];
2538 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2539 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2543 firstLetter = data[index];
2544 switch (firstLetter) {
2549 if ((data[++index] == '_') && (data[++index] == 'f')
2550 && (data[++index] == 'i') && (data[++index] == 'l')
2551 && (data[++index] == 'e') && (data[++index] == '_')
2552 && (data[++index] == '_'))
2553 return TokenNameFILE;
2554 index = 0; //__LINE__
2555 if ((data[++index] == '_') && (data[++index] == 'l')
2556 && (data[++index] == 'i') && (data[++index] == 'n')
2557 && (data[++index] == 'e') && (data[++index] == '_')
2558 && (data[++index] == '_'))
2559 return TokenNameLINE;
2563 if ((data[++index] == '_') && (data[++index] == 'c')
2564 && (data[++index] == 'l') && (data[++index] == 'a')
2565 && (data[++index] == 's') && (data[++index] == 's')
2566 && (data[++index] == '_') && (data[++index] == '_'))
2567 return TokenNameCLASS_C;
2571 if ((data[++index] == '_') && (data[++index] == 'm')
2572 && (data[++index] == 'e') && (data[++index] == 't')
2573 && (data[++index] == 'h') && (data[++index] == 'o')
2574 && (data[++index] == 'd') && (data[++index] == '_')
2575 && (data[++index] == '_'))
2576 return TokenNameMETHOD_C;
2580 if ((data[++index] == '_') && (data[++index] == 'f')
2581 && (data[++index] == 'u') && (data[++index] == 'n')
2582 && (data[++index] == 'c') && (data[++index] == 't')
2583 && (data[++index] == 'i') && (data[++index] == 'o')
2584 && (data[++index] == 'n') && (data[++index] == '_')
2585 && (data[++index] == '_'))
2586 return TokenNameFUNC_C;
2589 return TokenNameIdentifier;
2591 // as and array abstract
2595 if ((data[++index] == 's')) {
2598 return TokenNameIdentifier;
2602 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2603 return TokenNameAND;
2605 return TokenNameIdentifier;
2609 if ((data[++index] == 'r') && (data[++index] == 'r')
2610 && (data[++index] == 'a') && (data[++index] == 'y'))
2611 return TokenNamearray;
2613 return TokenNameIdentifier;
2615 if ((data[++index] == 'b') && (data[++index] == 's')
2616 && (data[++index] == 't') && (data[++index] == 'r')
2617 && (data[++index] == 'a') && (data[++index] == 'c')
2618 && (data[++index] == 't'))
2619 return TokenNameabstract;
2621 return TokenNameIdentifier;
2623 return TokenNameIdentifier;
2629 if ((data[++index] == 'r') && (data[++index] == 'e')
2630 && (data[++index] == 'a') && (data[++index] == 'k'))
2631 return TokenNamebreak;
2633 return TokenNameIdentifier;
2635 return TokenNameIdentifier;
2638 //case catch class const continue
2641 if ((data[++index] == 'a') && (data[++index] == 's')
2642 && (data[++index] == 'e'))
2643 return TokenNamecase;
2645 return TokenNameIdentifier;
2647 if ((data[++index] == 'a') && (data[++index] == 't')
2648 && (data[++index] == 'c') && (data[++index] == 'h'))
2649 return TokenNamecatch;
2650 if ((data[index] == 'l') && (data[++index] == 'a')
2651 && (data[++index] == 's') && (data[++index] == 's'))
2652 return TokenNameclass;
2653 if ((data[index] == 'o') && (data[++index] == 'n')
2654 && (data[++index] == 's') && (data[++index] == 't'))
2655 return TokenNameconst;
2657 return TokenNameIdentifier;
2659 if ((data[++index] == 'o') && (data[++index] == 'n')
2660 && (data[++index] == 't') && (data[++index] == 'i')
2661 && (data[++index] == 'n') && (data[++index] == 'u')
2662 && (data[++index] == 'e'))
2663 return TokenNamecontinue;
2665 return TokenNameIdentifier;
2667 return TokenNameIdentifier;
2670 // declare default do die
2671 // TODO delete define ==> no keyword !
2674 if ((data[++index] == 'o'))
2677 return TokenNameIdentifier;
2679 // if ((data[++index] == 'e')
2680 // && (data[++index] == 'f')
2681 // && (data[++index] == 'i')
2682 // && (data[++index] == 'n')
2683 // && (data[++index] == 'e'))
2684 // return TokenNamedefine;
2686 // return TokenNameIdentifier;
2688 if ((data[++index] == 'e') && (data[++index] == 'c')
2689 && (data[++index] == 'l') && (data[++index] == 'a')
2690 && (data[++index] == 'r') && (data[++index] == 'e'))
2691 return TokenNamedeclare;
2693 if ((data[++index] == 'e') && (data[++index] == 'f')
2694 && (data[++index] == 'a') && (data[++index] == 'u')
2695 && (data[++index] == 'l') && (data[++index] == 't'))
2696 return TokenNamedefault;
2698 return TokenNameIdentifier;
2700 return TokenNameIdentifier;
2703 //echo else exit elseif extends eval
2706 if ((data[++index] == 'c') && (data[++index] == 'h')
2707 && (data[++index] == 'o'))
2708 return TokenNameecho;
2709 else if ((data[index] == 'l') && (data[++index] == 's')
2710 && (data[++index] == 'e'))
2711 return TokenNameelse;
2712 else if ((data[index] == 'x') && (data[++index] == 'i')
2713 && (data[++index] == 't'))
2714 return TokenNameexit;
2715 else if ((data[index] == 'v') && (data[++index] == 'a')
2716 && (data[++index] == 'l'))
2717 return TokenNameeval;
2719 return TokenNameIdentifier;
2722 if ((data[++index] == 'n') && (data[++index] == 'd')
2723 && (data[++index] == 'i') && (data[++index] == 'f'))
2724 return TokenNameendif;
2725 if ((data[index] == 'm') && (data[++index] == 'p')
2726 && (data[++index] == 't') && (data[++index] == 'y'))
2727 return TokenNameempty;
2729 return TokenNameIdentifier;
2732 if ((data[++index] == 'n') && (data[++index] == 'd')
2733 && (data[++index] == 'f') && (data[++index] == 'o')
2734 && (data[++index] == 'r'))
2735 return TokenNameendfor;
2736 else if ((data[index] == 'l') && (data[++index] == 's')
2737 && (data[++index] == 'e') && (data[++index] == 'i')
2738 && (data[++index] == 'f'))
2739 return TokenNameelseif;
2741 return TokenNameIdentifier;
2743 if ((data[++index] == 'x') && (data[++index] == 't')
2744 && (data[++index] == 'e') && (data[++index] == 'n')
2745 && (data[++index] == 'd') && (data[++index] == 's'))
2746 return TokenNameextends;
2748 return TokenNameIdentifier;
2751 if ((data[++index] == 'n') && (data[++index] == 'd')
2752 && (data[++index] == 'w') && (data[++index] == 'h')
2753 && (data[++index] == 'i') && (data[++index] == 'l')
2754 && (data[++index] == 'e'))
2755 return TokenNameendwhile;
2757 return TokenNameIdentifier;
2760 if ((data[++index] == 'n') && (data[++index] == 'd')
2761 && (data[++index] == 's') && (data[++index] == 'w')
2762 && (data[++index] == 'i') && (data[++index] == 't')
2763 && (data[++index] == 'c') && (data[++index] == 'h'))
2764 return TokenNameendswitch;
2766 return TokenNameIdentifier;
2769 if ((data[++index] == 'n') && (data[++index] == 'd')
2770 && (data[++index] == 'd') && (data[++index] == 'e')
2771 && (data[++index] == 'c') && (data[++index] == 'l')
2772 && (data[++index] == 'a') && (data[++index] == 'r')
2773 && (data[++index] == 'e'))
2774 return TokenNameendforeach;
2776 if ((data[++index] == 'n') // endforeach
2777 && (data[++index] == 'd') && (data[++index] == 'f')
2778 && (data[++index] == 'o') && (data[++index] == 'r')
2779 && (data[++index] == 'e') && (data[++index] == 'a')
2780 && (data[++index] == 'c') && (data[++index] == 'h'))
2781 return TokenNameendforeach;
2783 return TokenNameIdentifier;
2785 return TokenNameIdentifier;
2788 //for false final function
2791 if ((data[++index] == 'o') && (data[++index] == 'r'))
2792 return TokenNamefor;
2794 return TokenNameIdentifier;
2796 // if ((data[++index] == 'a') && (data[++index] == 'l')
2797 // && (data[++index] == 's') && (data[++index] == 'e'))
2798 // return TokenNamefalse;
2799 if ((data[++index] == 'i') && (data[++index] == 'n')
2800 && (data[++index] == 'a') && (data[++index] == 'l'))
2801 return TokenNamefinal;
2803 return TokenNameIdentifier;
2806 if ((data[++index] == 'o') && (data[++index] == 'r')
2807 && (data[++index] == 'e') && (data[++index] == 'a')
2808 && (data[++index] == 'c') && (data[++index] == 'h'))
2809 return TokenNameforeach;
2811 return TokenNameIdentifier;
2814 if ((data[++index] == 'u') && (data[++index] == 'n')
2815 && (data[++index] == 'c') && (data[++index] == 't')
2816 && (data[++index] == 'i') && (data[++index] == 'o')
2817 && (data[++index] == 'n'))
2818 return TokenNamefunction;
2820 return TokenNameIdentifier;
2822 return TokenNameIdentifier;
2827 if ((data[++index] == 'l') && (data[++index] == 'o')
2828 && (data[++index] == 'b') && (data[++index] == 'a')
2829 && (data[++index] == 'l')) {
2830 return TokenNameglobal;
2833 return TokenNameIdentifier;
2835 //if int isset include include_once instanceof interface implements
2838 if (data[++index] == 'f')
2841 return TokenNameIdentifier;
2843 // if ((data[++index] == 'n') && (data[++index] == 't'))
2844 // return TokenNameint;
2846 // return TokenNameIdentifier;
2848 if ((data[++index] == 's') && (data[++index] == 's')
2849 && (data[++index] == 'e') && (data[++index] == 't'))
2850 return TokenNameisset;
2852 return TokenNameIdentifier;
2854 if ((data[++index] == 'n') && (data[++index] == 'c')
2855 && (data[++index] == 'l') && (data[++index] == 'u')
2856 && (data[++index] == 'd') && (data[++index] == 'e'))
2857 return TokenNameinclude;
2859 return TokenNameIdentifier;
2862 if ((data[++index] == 'n') && (data[++index] == 't')
2863 && (data[++index] == 'e') && (data[++index] == 'r')
2864 && (data[++index] == 'f') && (data[++index] == 'a')
2865 && (data[++index] == 'c') && (data[++index] == 'e'))
2866 return TokenNameinterface;
2868 return TokenNameIdentifier;
2871 if ((data[++index] == 'n') && (data[++index] == 's')
2872 && (data[++index] == 't') && (data[++index] == 'a')
2873 && (data[++index] == 'n') && (data[++index] == 'c')
2874 && (data[++index] == 'e') && (data[++index] == 'o')
2875 && (data[++index] == 'f'))
2876 return TokenNameinstanceof;
2877 if ((data[index] == 'm') && (data[++index] == 'p')
2878 && (data[++index] == 'l') && (data[++index] == 'e')
2879 && (data[++index] == 'm') && (data[++index] == 'e')
2880 && (data[++index] == 'n') && (data[++index] == 't')
2881 && (data[++index] == 's'))
2882 return TokenNameimplements;
2884 return TokenNameIdentifier;
2886 if ((data[++index] == 'n') && (data[++index] == 'c')
2887 && (data[++index] == 'l') && (data[++index] == 'u')
2888 && (data[++index] == 'd') && (data[++index] == 'e')
2889 && (data[++index] == '_') && (data[++index] == 'o')
2890 && (data[++index] == 'n') && (data[++index] == 'c')
2891 && (data[++index] == 'e'))
2892 return TokenNameinclude_once;
2894 return TokenNameIdentifier;
2896 return TokenNameIdentifier;
2901 if ((data[++index] == 'i') && (data[++index] == 's')
2902 && (data[++index] == 't')) {
2903 return TokenNamelist;
2906 return TokenNameIdentifier;
2911 if ((data[++index] == 'e') && (data[++index] == 'w'))
2912 return TokenNamenew;
2914 return TokenNameIdentifier;
2916 // if ((data[++index] == 'u') && (data[++index] == 'l')
2917 // && (data[++index] == 'l'))
2918 // return TokenNamenull;
2920 // return TokenNameIdentifier;
2922 return TokenNameIdentifier;
2927 if (data[++index] == 'r') {
2931 // if (length == 12) {
2932 // if ((data[++index] == 'l')
2933 // && (data[++index] == 'd')
2934 // && (data[++index] == '_')
2935 // && (data[++index] == 'f')
2936 // && (data[++index] == 'u')
2937 // && (data[++index] == 'n')
2938 // && (data[++index] == 'c')
2939 // && (data[++index] == 't')
2940 // && (data[++index] == 'i')
2941 // && (data[++index] == 'o')
2942 // && (data[++index] == 'n')) {
2943 // return TokenNameold_function;
2946 return TokenNameIdentifier;
2948 // print public private protected
2951 if ((data[++index] == 'r') && (data[++index] == 'i')
2952 && (data[++index] == 'n') && (data[++index] == 't')) {
2953 return TokenNameprint;
2955 return TokenNameIdentifier;
2957 if ((data[++index] == 'u') && (data[++index] == 'b')
2958 && (data[++index] == 'l') && (data[++index] == 'i')
2959 && (data[++index] == 'c')) {
2960 return TokenNamepublic;
2962 return TokenNameIdentifier;
2964 if ((data[++index] == 'r') && (data[++index] == 'i')
2965 && (data[++index] == 'v') && (data[++index] == 'a')
2966 && (data[++index] == 't') && (data[++index] == 'e')) {
2967 return TokenNameprivate;
2969 return TokenNameIdentifier;
2971 if ((data[++index] == 'r') && (data[++index] == 'o')
2972 && (data[++index] == 't') && (data[++index] == 'e')
2973 && (data[++index] == 'c') && (data[++index] == 't')
2974 && (data[++index] == 'e') && (data[++index] == 'd')) {
2975 return TokenNameprotected;
2977 return TokenNameIdentifier;
2979 return TokenNameIdentifier;
2981 //return require require_once
2983 if ((data[++index] == 'e') && (data[++index] == 't')
2984 && (data[++index] == 'u') && (data[++index] == 'r')
2985 && (data[++index] == 'n')) {
2986 return TokenNamereturn;
2988 } else if (length == 7) {
2989 if ((data[++index] == 'e') && (data[++index] == 'q')
2990 && (data[++index] == 'u') && (data[++index] == 'i')
2991 && (data[++index] == 'r') && (data[++index] == 'e')) {
2992 return TokenNamerequire;
2994 } else if (length == 12) {
2995 if ((data[++index] == 'e') && (data[++index] == 'q')
2996 && (data[++index] == 'u') && (data[++index] == 'i')
2997 && (data[++index] == 'r') && (data[++index] == 'e')
2998 && (data[++index] == '_') && (data[++index] == 'o')
2999 && (data[++index] == 'n') && (data[++index] == 'c')
3000 && (data[++index] == 'e')) {
3001 return TokenNamerequire_once;
3004 return TokenNameIdentifier;
3009 if (data[++index] == 't')
3010 if ((data[++index] == 'a') && (data[++index] == 't')
3011 && (data[++index] == 'i') && (data[++index] == 'c')) {
3012 return TokenNamestatic;
3014 return TokenNameIdentifier;
3015 else if ((data[index] == 'w') && (data[++index] == 'i')
3016 && (data[++index] == 't') && (data[++index] == 'c')
3017 && (data[++index] == 'h'))
3018 return TokenNameswitch;
3020 return TokenNameIdentifier;
3022 return TokenNameIdentifier;
3028 if ((data[++index] == 'r') && (data[++index] == 'y'))
3029 return TokenNametry;
3031 return TokenNameIdentifier;
3033 // if ((data[++index] == 'r') && (data[++index] == 'u')
3034 // && (data[++index] == 'e'))
3035 // return TokenNametrue;
3037 // return TokenNameIdentifier;
3039 if ((data[++index] == 'h') && (data[++index] == 'r')
3040 && (data[++index] == 'o') && (data[++index] == 'w'))
3041 return TokenNamethrow;
3043 return TokenNameIdentifier;
3045 return TokenNameIdentifier;
3051 if ((data[++index] == 's') && (data[++index] == 'e'))
3052 return TokenNameuse;
3054 return TokenNameIdentifier;
3056 if ((data[++index] == 'n') && (data[++index] == 's')
3057 && (data[++index] == 'e') && (data[++index] == 't'))
3058 return TokenNameunset;
3060 return TokenNameIdentifier;
3062 return TokenNameIdentifier;
3068 if ((data[++index] == 'a') && (data[++index] == 'r'))
3069 return TokenNamevar;
3071 return TokenNameIdentifier;
3073 return TokenNameIdentifier;
3079 if ((data[++index] == 'h') && (data[++index] == 'i')
3080 && (data[++index] == 'l') && (data[++index] == 'e'))
3081 return TokenNamewhile;
3083 return TokenNameIdentifier;
3084 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3085 // (data[++index]=='e') && (data[++index]=='f')&&
3086 // (data[++index]=='p'))
3087 //return TokenNamewidefp ;
3089 //return TokenNameIdentifier;
3091 return TokenNameIdentifier;
3097 if ((data[++index] == 'o') && (data[++index] == 'r'))
3098 return TokenNameXOR;
3100 return TokenNameIdentifier;
3102 return TokenNameIdentifier;
3105 return TokenNameIdentifier;
3108 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3109 //when entering this method the currentCharacter is the firt
3110 //digit of the number , i.e. it may be preceeded by a . when
3112 boolean floating = dotPrefix;
3113 if ((!dotPrefix) && (currentCharacter == '0')) {
3114 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3115 //force the first char of the hexa number do exist...
3116 // consume next character
3117 unicodeAsBackSlash = false;
3118 currentCharacter = source[currentPosition++];
3119 // if (((currentCharacter = source[currentPosition++]) == '\\')
3120 // && (source[currentPosition] == 'u')) {
3121 // getNextUnicodeChar();
3123 // if (withoutUnicodePtr != 0) {
3124 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3127 if (Character.digit(currentCharacter, 16) == -1)
3128 throw new InvalidInputException(INVALID_HEXA);
3130 while (getNextCharAsDigit(16)) {
3132 // if (getNextChar('l', 'L') >= 0)
3133 // return TokenNameLongLiteral;
3135 return TokenNameIntegerLiteral;
3137 //there is x or X in the number
3138 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3139 // 00078.0 is true !!!!! crazy language
3140 if (getNextCharAsDigit()) {
3141 //-------------potential octal-----------------
3142 while (getNextCharAsDigit()) {
3144 // if (getNextChar('l', 'L') >= 0) {
3145 // return TokenNameLongLiteral;
3148 // if (getNextChar('f', 'F') >= 0) {
3149 // return TokenNameFloatingPointLiteral;
3151 if (getNextChar('d', 'D') >= 0) {
3152 return TokenNameDoubleLiteral;
3153 } else { //make the distinction between octal and float ....
3154 if (getNextChar('.')) { //bingo ! ....
3155 while (getNextCharAsDigit()) {
3157 if (getNextChar('e', 'E') >= 0) {
3158 // consume next character
3159 unicodeAsBackSlash = false;
3160 currentCharacter = source[currentPosition++];
3161 // if (((currentCharacter = source[currentPosition++]) == '\\')
3162 // && (source[currentPosition] == 'u')) {
3163 // getNextUnicodeChar();
3165 // if (withoutUnicodePtr != 0) {
3166 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3169 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3170 // consume next character
3171 unicodeAsBackSlash = false;
3172 currentCharacter = source[currentPosition++];
3173 // if (((currentCharacter = source[currentPosition++]) == '\\')
3174 // && (source[currentPosition] == 'u')) {
3175 // getNextUnicodeChar();
3177 // if (withoutUnicodePtr != 0) {
3178 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3179 // currentCharacter;
3183 if (!Character.isDigit(currentCharacter))
3184 throw new InvalidInputException(INVALID_FLOAT);
3185 while (getNextCharAsDigit()) {
3188 // if (getNextChar('f', 'F') >= 0)
3189 // return TokenNameFloatingPointLiteral;
3190 getNextChar('d', 'D'); //jump over potential d or D
3191 return TokenNameDoubleLiteral;
3193 return TokenNameIntegerLiteral;
3200 while (getNextCharAsDigit()) {
3202 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3203 // return TokenNameLongLiteral;
3204 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3205 while (getNextCharAsDigit()) {
3209 //if floating is true both exponant and suffix may be optional
3210 if (getNextChar('e', 'E') >= 0) {
3212 // consume next character
3213 unicodeAsBackSlash = false;
3214 currentCharacter = source[currentPosition++];
3215 // if (((currentCharacter = source[currentPosition++]) == '\\')
3216 // && (source[currentPosition] == 'u')) {
3217 // getNextUnicodeChar();
3219 // if (withoutUnicodePtr != 0) {
3220 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3223 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3226 unicodeAsBackSlash = false;
3227 currentCharacter = source[currentPosition++];
3228 // if (((currentCharacter = source[currentPosition++]) == '\\')
3229 // && (source[currentPosition] == 'u')) {
3230 // getNextUnicodeChar();
3232 // if (withoutUnicodePtr != 0) {
3233 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3237 if (!Character.isDigit(currentCharacter))
3238 throw new InvalidInputException(INVALID_FLOAT);
3239 while (getNextCharAsDigit()) {
3242 if (getNextChar('d', 'D') >= 0)
3243 return TokenNameDoubleLiteral;
3244 // if (getNextChar('f', 'F') >= 0)
3245 // return TokenNameFloatingPointLiteral;
3246 //the long flag has been tested before
3247 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3250 * Search the line number corresponding to a specific position
3253 public final int getLineNumber(int position) {
3254 if (lineEnds == null)
3256 int length = linePtr + 1;
3259 int g = 0, d = length - 1;
3263 if (position < lineEnds[m]) {
3265 } else if (position > lineEnds[m]) {
3271 if (position < lineEnds[m]) {
3276 public void setPHPMode(boolean mode) {
3279 public final void setSource(char[] source) {
3280 //the source-buffer is set to sourceString
3281 if (source == null) {
3282 this.source = new char[0];
3284 this.source = source;
3287 initialPosition = currentPosition = 0;
3288 containsAssertKeyword = false;
3289 withoutUnicodeBuffer = new char[this.source.length];
3291 public String toString() {
3292 if (startPosition == source.length)
3293 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3294 if (currentPosition > source.length)
3295 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3296 char front[] = new char[startPosition];
3297 System.arraycopy(source, 0, front, 0, startPosition);
3298 int middleLength = (currentPosition - 1) - startPosition + 1;
3300 if (middleLength > -1) {
3301 middle = new char[middleLength];
3302 System.arraycopy(source, startPosition, middle, 0, middleLength);
3304 middle = new char[0];
3306 char end[] = new char[source.length - (currentPosition - 1)];
3307 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3308 - (currentPosition - 1) - 1);
3309 return new String(front)
3310 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3311 + new String(middle)
3312 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3315 public final String toStringAction(int act) {
3317 case TokenNameERROR :
3318 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3320 case TokenNameStopPHP :
3321 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3322 case TokenNameIdentifier :
3323 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3324 case TokenNameVariable :
3325 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3326 case TokenNameabstract :
3327 return "abstract"; //$NON-NLS-1$
3328 case TokenNamearray :
3329 return "array"; //$NON-NLS-1$
3331 return "as"; //$NON-NLS-1$
3332 case TokenNamebreak :
3333 return "break"; //$NON-NLS-1$
3334 case TokenNamecase :
3335 return "case"; //$NON-NLS-1$
3336 case TokenNameclass :
3337 return "class"; //$NON-NLS-1$
3338 case TokenNamecontinue :
3339 return "continue"; //$NON-NLS-1$
3340 case TokenNamedefault :
3341 return "default"; //$NON-NLS-1$
3342 // case TokenNamedefine :
3343 // return "define"; //$NON-NLS-1$
3345 return "do"; //$NON-NLS-1$
3346 case TokenNameecho :
3347 return "echo"; //$NON-NLS-1$
3348 case TokenNameelse :
3349 return "else"; //$NON-NLS-1$
3350 case TokenNameelseif :
3351 return "elseif"; //$NON-NLS-1$
3352 case TokenNameendfor :
3353 return "endfor"; //$NON-NLS-1$
3354 case TokenNameendforeach :
3355 return "endforeach"; //$NON-NLS-1$
3356 case TokenNameendif :
3357 return "endif"; //$NON-NLS-1$
3358 case TokenNameendswitch :
3359 return "endswitch"; //$NON-NLS-1$
3360 case TokenNameendwhile :
3361 return "endwhile"; //$NON-NLS-1$
3362 case TokenNameextends :
3363 return "extends"; //$NON-NLS-1$
3364 // case TokenNamefalse :
3365 // return "false"; //$NON-NLS-1$
3366 case TokenNamefinal :
3367 return "final"; //$NON-NLS-1$
3369 return "for"; //$NON-NLS-1$
3370 case TokenNameforeach :
3371 return "foreach"; //$NON-NLS-1$
3372 case TokenNamefunction :
3373 return "function"; //$NON-NLS-1$
3374 case TokenNameglobal :
3375 return "global"; //$NON-NLS-1$
3377 return "if"; //$NON-NLS-1$
3378 case TokenNameimplements :
3379 return "implements"; //$NON-NLS-1$
3380 case TokenNameinclude :
3381 return "include"; //$NON-NLS-1$
3382 case TokenNameinclude_once :
3383 return "include_once"; //$NON-NLS-1$
3384 case TokenNameinterface :
3385 return "interface"; //$NON-NLS-1$
3386 case TokenNamelist :
3387 return "list"; //$NON-NLS-1$
3389 return "new"; //$NON-NLS-1$
3390 // case TokenNamenull :
3391 // return "null"; //$NON-NLS-1$
3392 case TokenNameprint :
3393 return "print"; //$NON-NLS-1$
3394 case TokenNameprivate :
3395 return "private"; //$NON-NLS-1$
3396 case TokenNameprotected :
3397 return "protected"; //$NON-NLS-1$
3398 case TokenNamepublic :
3399 return "public"; //$NON-NLS-1$
3400 case TokenNamerequire :
3401 return "require"; //$NON-NLS-1$
3402 case TokenNamerequire_once :
3403 return "require_once"; //$NON-NLS-1$
3404 case TokenNamereturn :
3405 return "return"; //$NON-NLS-1$
3406 case TokenNamestatic :
3407 return "static"; //$NON-NLS-1$
3408 case TokenNameswitch :
3409 return "switch"; //$NON-NLS-1$
3410 // case TokenNametrue :
3411 // return "true"; //$NON-NLS-1$
3412 case TokenNameunset :
3413 return "unset"; //$NON-NLS-1$
3415 return "var"; //$NON-NLS-1$
3416 case TokenNamewhile :
3417 return "while"; //$NON-NLS-1$
3418 case TokenNamethis :
3419 return "$this"; //$NON-NLS-1$
3420 case TokenNameIntegerLiteral :
3421 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3422 case TokenNameDoubleLiteral :
3423 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3424 case TokenNameStringLiteral :
3425 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3426 case TokenNameStringConstant :
3427 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3428 case TokenNameStringInterpolated :
3429 return "StringInterpolated(" + new String(getCurrentTokenSource())
3430 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3431 case TokenNameHEREDOC :
3432 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3433 case TokenNamePLUS_PLUS :
3434 return "++"; //$NON-NLS-1$
3435 case TokenNameMINUS_MINUS :
3436 return "--"; //$NON-NLS-1$
3437 case TokenNameEQUAL_EQUAL :
3438 return "=="; //$NON-NLS-1$
3439 case TokenNameEQUAL_EQUAL_EQUAL :
3440 return "==="; //$NON-NLS-1$
3441 case TokenNameEQUAL_GREATER :
3442 return "=>"; //$NON-NLS-1$
3443 case TokenNameLESS_EQUAL :
3444 return "<="; //$NON-NLS-1$
3445 case TokenNameGREATER_EQUAL :
3446 return ">="; //$NON-NLS-1$
3447 case TokenNameNOT_EQUAL :
3448 return "!="; //$NON-NLS-1$
3449 case TokenNameNOT_EQUAL_EQUAL :
3450 return "!=="; //$NON-NLS-1$
3451 case TokenNameLEFT_SHIFT :
3452 return "<<"; //$NON-NLS-1$
3453 case TokenNameRIGHT_SHIFT :
3454 return ">>"; //$NON-NLS-1$
3455 case TokenNamePLUS_EQUAL :
3456 return "+="; //$NON-NLS-1$
3457 case TokenNameMINUS_EQUAL :
3458 return "-="; //$NON-NLS-1$
3459 case TokenNameMULTIPLY_EQUAL :
3460 return "*="; //$NON-NLS-1$
3461 case TokenNameDIVIDE_EQUAL :
3462 return "/="; //$NON-NLS-1$
3463 case TokenNameAND_EQUAL :
3464 return "&="; //$NON-NLS-1$
3465 case TokenNameOR_EQUAL :
3466 return "|="; //$NON-NLS-1$
3467 case TokenNameXOR_EQUAL :
3468 return "^="; //$NON-NLS-1$
3469 case TokenNameREMAINDER_EQUAL :
3470 return "%="; //$NON-NLS-1$
3471 case TokenNameLEFT_SHIFT_EQUAL :
3472 return "<<="; //$NON-NLS-1$
3473 case TokenNameRIGHT_SHIFT_EQUAL :
3474 return ">>="; //$NON-NLS-1$
3475 case TokenNameOR_OR :
3476 return "||"; //$NON-NLS-1$
3477 case TokenNameAND_AND :
3478 return "&&"; //$NON-NLS-1$
3479 case TokenNamePLUS :
3480 return "+"; //$NON-NLS-1$
3481 case TokenNameMINUS :
3482 return "-"; //$NON-NLS-1$
3483 case TokenNameMINUS_GREATER :
3486 return "!"; //$NON-NLS-1$
3487 case TokenNameREMAINDER :
3488 return "%"; //$NON-NLS-1$
3490 return "^"; //$NON-NLS-1$
3492 return "&"; //$NON-NLS-1$
3493 case TokenNameMULTIPLY :
3494 return "*"; //$NON-NLS-1$
3496 return "|"; //$NON-NLS-1$
3497 case TokenNameTWIDDLE :
3498 return "~"; //$NON-NLS-1$
3499 case TokenNameTWIDDLE_EQUAL :
3500 return "~="; //$NON-NLS-1$
3501 case TokenNameDIVIDE :
3502 return "/"; //$NON-NLS-1$
3503 case TokenNameGREATER :
3504 return ">"; //$NON-NLS-1$
3505 case TokenNameLESS :
3506 return "<"; //$NON-NLS-1$
3507 case TokenNameLPAREN :
3508 return "("; //$NON-NLS-1$
3509 case TokenNameRPAREN :
3510 return ")"; //$NON-NLS-1$
3511 case TokenNameLBRACE :
3512 return "{"; //$NON-NLS-1$
3513 case TokenNameRBRACE :
3514 return "}"; //$NON-NLS-1$
3515 case TokenNameLBRACKET :
3516 return "["; //$NON-NLS-1$
3517 case TokenNameRBRACKET :
3518 return "]"; //$NON-NLS-1$
3519 case TokenNameSEMICOLON :
3520 return ";"; //$NON-NLS-1$
3521 case TokenNameQUESTION :
3522 return "?"; //$NON-NLS-1$
3523 case TokenNameCOLON :
3524 return ":"; //$NON-NLS-1$
3525 case TokenNameCOMMA :
3526 return ","; //$NON-NLS-1$
3528 return "."; //$NON-NLS-1$
3529 case TokenNameEQUAL :
3530 return "="; //$NON-NLS-1$
3533 case TokenNameDOLLAR_LBRACE :
3536 return "EOF"; //$NON-NLS-1$
3537 case TokenNameWHITESPACE :
3538 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3539 case TokenNameCOMMENT_LINE :
3540 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3541 case TokenNameCOMMENT_BLOCK :
3542 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3543 case TokenNameCOMMENT_PHPDOC :
3544 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3545 case TokenNameHTML :
3546 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3547 case TokenNameFILE :
3548 return "__FILE__"; //$NON-NLS-1$
3549 case TokenNameLINE :
3550 return "__LINE__"; //$NON-NLS-1$
3551 case TokenNameCLASS_C :
3552 return "__CLASS__"; //$NON-NLS-1$
3553 case TokenNameMETHOD_C :
3554 return "__METHOD__"; //$NON-NLS-1$
3555 case TokenNameFUNC_C :
3556 return "__FUNCTION__"; //$NON-NLS-1$
3558 return "not-a-token(" + (new Integer(act)) + ") "
3559 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3562 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3563 boolean checkNonExternalizedStringLiterals) {
3564 this(tokenizeComments, tokenizeWhiteSpace,
3565 checkNonExternalizedStringLiterals, false);
3567 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3568 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3569 this.eofPosition = Integer.MAX_VALUE;
3570 this.tokenizeComments = tokenizeComments;
3571 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3572 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3573 this.assertMode = assertMode;
3575 private void checkNonExternalizeString() throws InvalidInputException {
3576 if (currentLine == null)
3578 parseTags(currentLine);
3580 private void parseTags(NLSLine line) throws InvalidInputException {
3581 String s = new String(getCurrentTokenSource());
3582 int pos = s.indexOf(TAG_PREFIX);
3583 int lineLength = line.size();
3585 int start = pos + TAG_PREFIX_LENGTH;
3586 int end = s.indexOf(TAG_POSTFIX, start);
3587 String index = s.substring(start, end);
3590 i = Integer.parseInt(index) - 1;
3591 // Tags are one based not zero based.
3592 } catch (NumberFormatException e) {
3593 i = -1; // we don't want to consider this as a valid NLS tag
3595 if (line.exists(i)) {
3598 pos = s.indexOf(TAG_PREFIX, start);
3600 this.nonNLSStrings = new StringLiteral[lineLength];
3601 int nonNLSCounter = 0;
3602 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3603 StringLiteral literal = (StringLiteral) iterator.next();
3604 if (literal != null) {
3605 this.nonNLSStrings[nonNLSCounter++] = literal;
3608 if (nonNLSCounter == 0) {
3609 this.nonNLSStrings = null;
3613 this.wasNonExternalizedStringLiteral = true;
3614 if (nonNLSCounter != lineLength) {
3615 System.arraycopy(this.nonNLSStrings, 0,
3616 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3621 public final void scanEscapeCharacter() throws InvalidInputException {
3622 // the string with "\\u" is a legal string of two chars \ and u
3623 //thus we use a direct access to the source (for regular cases).
3624 if (unicodeAsBackSlash) {
3625 // consume next character
3626 unicodeAsBackSlash = false;
3627 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3628 // (source[currentPosition] == 'u')) {
3629 // getNextUnicodeChar();
3631 if (withoutUnicodePtr != 0) {
3632 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3636 currentCharacter = source[currentPosition++];
3637 switch (currentCharacter) {
3639 currentCharacter = '\b';
3642 currentCharacter = '\t';
3645 currentCharacter = '\n';
3648 currentCharacter = '\f';
3651 currentCharacter = '\r';
3654 currentCharacter = '\"';
3657 currentCharacter = '\'';
3660 currentCharacter = '\\';
3663 // -----------octal escape--------------
3665 // OctalDigit OctalDigit
3666 // ZeroToThree OctalDigit OctalDigit
3667 int number = Character.getNumericValue(currentCharacter);
3668 if (number >= 0 && number <= 7) {
3669 boolean zeroToThreeNot = number > 3;
3670 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3671 int digit = Character.getNumericValue(currentCharacter);
3672 if (digit >= 0 && digit <= 7) {
3673 number = (number * 8) + digit;
3675 .isDigit(currentCharacter = source[currentPosition++])) {
3676 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3677 // Digit --> ignore last character
3680 digit = Character.getNumericValue(currentCharacter);
3681 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3682 // OctalDigit OctalDigit
3683 number = (number * 8) + digit;
3684 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3685 // --> ignore last character
3689 } else { // has read \OctalDigit NonDigit--> ignore last
3693 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3697 } else { // has read \OctalDigit --> ignore last character
3701 throw new InvalidInputException(INVALID_ESCAPE);
3702 currentCharacter = (char) number;
3704 throw new InvalidInputException(INVALID_ESCAPE);
3707 // chech presence of task: tags
3708 public void checkTaskTag(int commentStart, int commentEnd) {
3709 // only look for newer task: tags
3710 if (this.foundTaskCount > 0
3711 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3714 int foundTaskIndex = this.foundTaskCount;
3715 nextChar : for (int i = commentStart; i < commentEnd
3716 && i < this.eofPosition; i++) {
3718 char[] priority = null;
3719 // check for tag occurrence
3720 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3721 tag = this.taskTags[itag];
3722 priority = this.taskPriorities != null
3723 && itag < this.taskPriorities.length
3724 ? this.taskPriorities[itag]
3726 int tagLength = tag.length;
3727 for (int t = 0; t < tagLength; t++) {
3728 if (this.source[i + t] != tag[t])
3731 if (this.foundTaskTags == null) {
3732 this.foundTaskTags = new char[5][];
3733 this.foundTaskMessages = new char[5][];
3734 this.foundTaskPriorities = new char[5][];
3735 this.foundTaskPositions = new int[5][];
3736 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3737 System.arraycopy(this.foundTaskTags, 0,
3738 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3739 this.foundTaskCount);
3740 System.arraycopy(this.foundTaskMessages, 0,
3741 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3742 this.foundTaskCount);
3743 System.arraycopy(this.foundTaskPriorities, 0,
3744 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3745 0, this.foundTaskCount);
3746 System.arraycopy(this.foundTaskPositions, 0,
3747 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3748 this.foundTaskCount);
3750 this.foundTaskTags[this.foundTaskCount] = tag;
3751 this.foundTaskPriorities[this.foundTaskCount] = priority;
3752 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3754 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3755 this.foundTaskCount++;
3756 i += tagLength - 1; // will be incremented when looping
3759 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3760 // retrieve message start and end positions
3761 int msgStart = this.foundTaskPositions[i][0]
3762 + this.foundTaskTags[i].length;
3763 int max_value = i + 1 < this.foundTaskCount
3764 ? this.foundTaskPositions[i + 1][0] - 1
3766 // at most beginning of next task
3767 if (max_value < msgStart)
3768 max_value = msgStart; // would only occur if tag is before EOF.
3771 for (int j = msgStart; j < max_value; j++) {
3772 if ((c = this.source[j]) == '\n' || c == '\r') {
3778 for (int j = max_value; j > msgStart; j--) {
3779 if ((c = this.source[j]) == '*') {
3787 if (msgStart == end)
3790 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3792 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3794 // update the end position of the task
3795 this.foundTaskPositions[i][1] = end;
3796 // get the message source
3797 final int messageLength = end - msgStart + 1;
3798 char[] message = new char[messageLength];
3799 System.arraycopy(source, msgStart, message, 0, messageLength);
3800 this.foundTaskMessages[i] = message;