1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
16 import net.sourceforge.phpdt.core.compiler.CharOperation;
17 import net.sourceforge.phpdt.core.compiler.IScanner;
18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token
24 * (this value is not memorized by the scanner) - getCurrentTokenSource()
25 * which provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
32 //flag indicating if processed source contains occurrences of keyword assert
33 public boolean containsAssertKeyword = false;
34 public boolean recordLineSeparator;
35 public boolean phpMode = false;
36 public Stack encapsedStringStack = null;
37 public char currentCharacter;
38 public int startPosition;
39 public int currentPosition;
40 public int initialPosition, eofPosition;
41 // after this position eof are generated instead of real token from the
43 public boolean tokenizeComments;
44 public boolean tokenizeWhiteSpace;
45 //source should be viewed as a window (aka a part)
46 //of a entire very large stream
49 public char[] withoutUnicodeBuffer;
50 public int withoutUnicodePtr;
51 //when == 0 ==> no unicode in the current token
52 public boolean unicodeAsBackSlash = false;
53 public boolean scanningFloatLiteral = false;
54 //support for /** comments
55 //public char[][] comments = new char[10][];
56 public int[] commentStops = new int[10];
57 public int[] commentStarts = new int[10];
58 public int commentPtr = -1; // no comment test with commentPtr value -1
59 //diet parsing support - jump over some method body when requested
60 public boolean diet = false;
61 //support for the poor-line-debuggers ....
62 //remember the position of the cr/lf
63 public int[] lineEnds = new int[250];
64 public int linePtr = -1;
65 public boolean wasAcr = false;
66 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
67 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
68 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
69 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
70 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
71 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
72 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
73 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
74 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
75 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
76 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
77 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
78 //----------------optimized identifier managment------------------
79 static final char[] charArray_a = new char[]{'a'},
80 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
81 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
82 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
83 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
84 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
85 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
86 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
87 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
88 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
89 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
90 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
91 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
92 charArray_z = new char[]{'z'};
93 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
94 '\u0000', '\u0000', '\u0000'};
95 static final int TableSize = 30, InternalTableSize = 6;
97 public static final int OptimizedLength = 6;
99 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
100 // support for detecting non-externalized string literals
101 int currentLineNr = -1;
102 int previousLineNr = -1;
103 NLSLine currentLine = null;
104 List lines = new ArrayList();
105 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
106 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
107 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
108 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
109 public StringLiteral[] nonNLSStrings = null;
110 public boolean checkNonExternalizedStringLiterals = true;
111 public boolean wasNonExternalizedStringLiteral = false;
113 for (int i = 0; i < 6; i++) {
114 for (int j = 0; j < TableSize; j++) {
115 for (int k = 0; k < InternalTableSize; k++) {
116 charArray_length[i][j][k] = initCharArray;
121 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
123 public static final int RoundBracket = 0;
124 public static final int SquareBracket = 1;
125 public static final int CurlyBracket = 2;
126 public static final int BracketKinds = 3;
128 public char[][] foundTaskTags = null;
129 public char[][] foundTaskMessages;
130 public char[][] foundTaskPriorities = null;
131 public int[][] foundTaskPositions;
132 public int foundTaskCount = 0;
133 public char[][] taskTags = null;
134 public char[][] taskPriorities = null;
135 public static final boolean DEBUG = false;
136 public static final boolean TRACE = false;
140 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
141 this(tokenizeComments, tokenizeWhiteSpace, false);
144 * Determines if the specified character is permissible as the first
145 * character in a PHP identifier
147 public static boolean isPHPIdentifierStart(char ch) {
148 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
151 * Determines if the specified character may be part of a PHP identifier as
152 * other than the first character
154 public static boolean isPHPIdentifierPart(char ch) {
155 return Character.isLetterOrDigit(ch) || (ch == '_')
156 || (0x7F <= ch && ch <= 0xFF);
158 public final boolean atEnd() {
159 // This code is not relevant if source is
160 // Only a part of the real stream input
161 return source.length == currentPosition;
163 public char[] getCurrentIdentifierSource() {
164 //return the token REAL source (aka unicodes are precomputed)
166 // if (withoutUnicodePtr != 0)
167 // //0 is used as a fast test flag so the real first char is in position 1
169 // withoutUnicodeBuffer,
171 // result = new char[withoutUnicodePtr],
173 // withoutUnicodePtr);
175 int length = currentPosition - startPosition;
176 switch (length) { // see OptimizedLength
178 return optimizedCurrentTokenSource1();
180 return optimizedCurrentTokenSource2();
182 return optimizedCurrentTokenSource3();
184 return optimizedCurrentTokenSource4();
186 return optimizedCurrentTokenSource5();
188 return optimizedCurrentTokenSource6();
191 System.arraycopy(source, startPosition, result = new char[length], 0,
196 public int getCurrentTokenEndPosition() {
197 return this.currentPosition - 1;
199 public final char[] getCurrentTokenSource() {
200 // Return the token REAL source (aka unicodes are precomputed)
202 // if (withoutUnicodePtr != 0)
203 // // 0 is used as a fast test flag so the real first char is in position 1
205 // withoutUnicodeBuffer,
207 // result = new char[withoutUnicodePtr],
209 // withoutUnicodePtr);
212 System.arraycopy(source, startPosition,
213 result = new char[length = currentPosition - startPosition], 0, length);
217 public final char[] getCurrentTokenSource(int startPos) {
218 // Return the token REAL source (aka unicodes are precomputed)
220 // if (withoutUnicodePtr != 0)
221 // // 0 is used as a fast test flag so the real first char is in position 1
223 // withoutUnicodeBuffer,
225 // result = new char[withoutUnicodePtr],
227 // withoutUnicodePtr);
230 System.arraycopy(source, startPos,
231 result = new char[length = currentPosition - startPos], 0, length);
235 public final char[] getCurrentTokenSourceString() {
236 //return the token REAL source (aka unicodes are precomputed).
237 //REMOVE the two " that are at the beginning and the end.
239 if (withoutUnicodePtr != 0)
240 //0 is used as a fast test flag so the real first char is in position 1
241 System.arraycopy(withoutUnicodeBuffer, 2,
242 //2 is 1 (real start) + 1 (to jump over the ")
243 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
246 System.arraycopy(source, startPosition + 1,
247 result = new char[length = currentPosition - startPosition - 2], 0,
252 public int getCurrentTokenStartPosition() {
253 return this.startPosition;
255 public final char[] getCurrentStringLiteralSource() {
256 // Return the token REAL source (aka unicodes are precomputed)
259 System.arraycopy(source, startPosition + 1,
260 result = new char[length = currentPosition - startPosition - 2], 0,
266 * Search the source position corresponding to the end of a given line number
268 * Line numbers are 1-based, and relative to the scanner initialPosition.
269 * Character positions are 0-based.
271 * In case the given line number is inconsistent, answers -1.
273 public final int getLineEnd(int lineNumber) {
274 if (lineEnds == null)
276 if (lineNumber >= lineEnds.length)
280 if (lineNumber == lineEnds.length - 1)
282 return lineEnds[lineNumber - 1];
283 // next line start one character behind the lineEnd of the previous line
286 * Search the source position corresponding to the beginning of a given line
289 * Line numbers are 1-based, and relative to the scanner initialPosition.
290 * Character positions are 0-based.
292 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
294 * In case the given line number is inconsistent, answers -1.
296 public final int getLineStart(int lineNumber) {
297 if (lineEnds == null)
299 if (lineNumber >= lineEnds.length)
304 return initialPosition;
305 return lineEnds[lineNumber - 2] + 1;
306 // next line start one character behind the lineEnd of the previous line
308 public final boolean getNextChar(char testedChar) {
310 //handle the case of unicode.
311 //when a unicode appears then we must use a buffer that holds char
313 //At the end of this method currentCharacter holds the new visited char
314 //and currentPosition points right next after it
315 //Both previous lines are true if the currentCharacter is == to the
317 //On false, no side effect has occured.
318 //ALL getNextChar.... ARE OPTIMIZED COPIES
319 int temp = currentPosition;
321 currentCharacter = source[currentPosition++];
322 // if (((currentCharacter = source[currentPosition++]) == '\\')
323 // && (source[currentPosition] == 'u')) {
324 // //-------------unicode traitement ------------
325 // int c1, c2, c3, c4;
326 // int unicodeSize = 6;
327 // currentPosition++;
328 // while (source[currentPosition] == 'u') {
329 // currentPosition++;
333 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
335 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
337 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
339 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
341 // currentPosition = temp;
345 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
346 // if (currentCharacter != testedChar) {
347 // currentPosition = temp;
350 // unicodeAsBackSlash = currentCharacter == '\\';
352 // //need the unicode buffer
353 // if (withoutUnicodePtr == 0) {
354 // //buffer all the entries that have been left aside....
355 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
359 // withoutUnicodeBuffer,
361 // withoutUnicodePtr);
363 // //fill the buffer with the char
364 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
367 // } //-------------end unicode traitement--------------
369 if (currentCharacter != testedChar) {
370 currentPosition = temp;
373 unicodeAsBackSlash = false;
374 // if (withoutUnicodePtr != 0)
375 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
378 } catch (IndexOutOfBoundsException e) {
379 unicodeAsBackSlash = false;
380 currentPosition = temp;
384 public final int getNextChar(char testedChar1, char testedChar2) {
385 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
386 //test can be done with (x==0) for the first and (x>0) for the second
387 //handle the case of unicode.
388 //when a unicode appears then we must use a buffer that holds char
390 //At the end of this method currentCharacter holds the new visited char
391 //and currentPosition points right next after it
392 //Both previous lines are true if the currentCharacter is == to the
394 //On false, no side effect has occured.
395 //ALL getNextChar.... ARE OPTIMIZED COPIES
396 int temp = currentPosition;
399 currentCharacter = source[currentPosition++];
400 // if (((currentCharacter = source[currentPosition++]) == '\\')
401 // && (source[currentPosition] == 'u')) {
402 // //-------------unicode traitement ------------
403 // int c1, c2, c3, c4;
404 // int unicodeSize = 6;
405 // currentPosition++;
406 // while (source[currentPosition] == 'u') {
407 // currentPosition++;
411 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
413 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
415 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
417 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
419 // currentPosition = temp;
423 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
424 // if (currentCharacter == testedChar1)
426 // else if (currentCharacter == testedChar2)
429 // currentPosition = temp;
433 // //need the unicode buffer
434 // if (withoutUnicodePtr == 0) {
435 // //buffer all the entries that have been left aside....
436 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
440 // withoutUnicodeBuffer,
442 // withoutUnicodePtr);
444 // //fill the buffer with the char
445 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
447 // } //-------------end unicode traitement--------------
449 if (currentCharacter == testedChar1)
451 else if (currentCharacter == testedChar2)
454 currentPosition = temp;
457 // if (withoutUnicodePtr != 0)
458 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
461 } catch (IndexOutOfBoundsException e) {
462 currentPosition = temp;
466 public final boolean getNextCharAsDigit() {
468 //handle the case of unicode.
469 //when a unicode appears then we must use a buffer that holds char
471 //At the end of this method currentCharacter holds the new visited char
472 //and currentPosition points right next after it
473 //Both previous lines are true if the currentCharacter is a digit
474 //On false, no side effect has occured.
475 //ALL getNextChar.... ARE OPTIMIZED COPIES
476 int temp = currentPosition;
478 currentCharacter = source[currentPosition++];
479 // if (((currentCharacter = source[currentPosition++]) == '\\')
480 // && (source[currentPosition] == 'u')) {
481 // //-------------unicode traitement ------------
482 // int c1, c2, c3, c4;
483 // int unicodeSize = 6;
484 // currentPosition++;
485 // while (source[currentPosition] == 'u') {
486 // currentPosition++;
490 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
492 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
494 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
496 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
498 // currentPosition = temp;
502 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
503 // if (!Character.isDigit(currentCharacter)) {
504 // currentPosition = temp;
508 // //need the unicode buffer
509 // if (withoutUnicodePtr == 0) {
510 // //buffer all the entries that have been left aside....
511 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
515 // withoutUnicodeBuffer,
517 // withoutUnicodePtr);
519 // //fill the buffer with the char
520 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
522 // } //-------------end unicode traitement--------------
524 if (!Character.isDigit(currentCharacter)) {
525 currentPosition = temp;
528 // if (withoutUnicodePtr != 0)
529 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
532 } catch (IndexOutOfBoundsException e) {
533 currentPosition = temp;
537 public final boolean getNextCharAsDigit(int radix) {
539 //handle the case of unicode.
540 //when a unicode appears then we must use a buffer that holds char
542 //At the end of this method currentCharacter holds the new visited char
543 //and currentPosition points right next after it
544 //Both previous lines are true if the currentCharacter is a digit base on
546 //On false, no side effect has occured.
547 //ALL getNextChar.... ARE OPTIMIZED COPIES
548 int temp = currentPosition;
550 currentCharacter = source[currentPosition++];
551 // if (((currentCharacter = source[currentPosition++]) == '\\')
552 // && (source[currentPosition] == 'u')) {
553 // //-------------unicode traitement ------------
554 // int c1, c2, c3, c4;
555 // int unicodeSize = 6;
556 // currentPosition++;
557 // while (source[currentPosition] == 'u') {
558 // currentPosition++;
562 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
564 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
566 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
568 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
570 // currentPosition = temp;
574 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
575 // if (Character.digit(currentCharacter, radix) == -1) {
576 // currentPosition = temp;
580 // //need the unicode buffer
581 // if (withoutUnicodePtr == 0) {
582 // //buffer all the entries that have been left aside....
583 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
587 // withoutUnicodeBuffer,
589 // withoutUnicodePtr);
591 // //fill the buffer with the char
592 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
594 // } //-------------end unicode traitement--------------
596 if (Character.digit(currentCharacter, radix) == -1) {
597 currentPosition = temp;
600 // if (withoutUnicodePtr != 0)
601 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
604 } catch (IndexOutOfBoundsException e) {
605 currentPosition = temp;
609 public boolean getNextCharAsJavaIdentifierPart() {
611 //handle the case of unicode.
612 //when a unicode appears then we must use a buffer that holds char
614 //At the end of this method currentCharacter holds the new visited char
615 //and currentPosition points right next after it
616 //Both previous lines are true if the currentCharacter is a
617 // JavaIdentifierPart
618 //On false, no side effect has occured.
619 //ALL getNextChar.... ARE OPTIMIZED COPIES
620 int temp = currentPosition;
622 currentCharacter = source[currentPosition++];
623 // if (((currentCharacter = source[currentPosition++]) == '\\')
624 // && (source[currentPosition] == 'u')) {
625 // //-------------unicode traitement ------------
626 // int c1, c2, c3, c4;
627 // int unicodeSize = 6;
628 // currentPosition++;
629 // while (source[currentPosition] == 'u') {
630 // currentPosition++;
634 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
636 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
638 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
640 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
642 // currentPosition = temp;
646 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
647 // if (!isPHPIdentifierPart(currentCharacter)) {
648 // currentPosition = temp;
652 // //need the unicode buffer
653 // if (withoutUnicodePtr == 0) {
654 // //buffer all the entries that have been left aside....
655 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
659 // withoutUnicodeBuffer,
661 // withoutUnicodePtr);
663 // //fill the buffer with the char
664 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
666 // } //-------------end unicode traitement--------------
668 if (!isPHPIdentifierPart(currentCharacter)) {
669 currentPosition = temp;
672 // if (withoutUnicodePtr != 0)
673 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
676 } catch (IndexOutOfBoundsException e) {
677 currentPosition = temp;
681 public int getCastOrParen() {
682 int tempPosition = currentPosition;
683 char tempCharacter = currentCharacter;
684 int tempToken = TokenNameLPAREN;
685 boolean found = false;
686 StringBuffer buf = new StringBuffer();
689 currentCharacter = source[currentPosition++];
690 } while (currentCharacter == ' ' || currentCharacter == '\t');
691 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
692 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
693 buf.append(currentCharacter);
694 currentCharacter = source[currentPosition++];
696 if (buf.length() >= 3 && buf.length() <= 7) {
697 char[] data = buf.toString().toCharArray();
699 switch (data.length) {
702 if ((data[index] == 'i') && (data[++index] == 'n')
703 && (data[++index] == 't')) {
705 tempToken = TokenNameintCAST;
710 if ((data[index] == 'b') && (data[++index] == 'o')
711 && (data[++index] == 'o') && (data[++index] == 'l')) {
713 tempToken = TokenNameboolCAST;
716 if ((data[index] == 'r') && (data[++index] == 'e')
717 && (data[++index] == 'a') && (data[++index] == 'l')) {
719 tempToken = TokenNamedoubleCAST;
725 if ((data[index] == 'a') && (data[++index] == 'r')
726 && (data[++index] == 'r') && (data[++index] == 'a')
727 && (data[++index] == 'y')) {
729 tempToken = TokenNamearrayCAST;
732 if ((data[index] == 'u') && (data[++index] == 'n')
733 && (data[++index] == 's') && (data[++index] == 'e')
734 && (data[++index] == 't')) {
736 tempToken = TokenNameunsetCAST;
739 if ((data[index] == 'f') && (data[++index] == 'l')
740 && (data[++index] == 'o') && (data[++index] == 'a')
741 && (data[++index] == 't')) {
743 tempToken = TokenNamedoubleCAST;
749 // object string double
750 if ((data[index] == 'o') && (data[++index] == 'b')
751 && (data[++index] == 'j') && (data[++index] == 'e')
752 && (data[++index] == 'c') && (data[++index] == 't')) {
754 tempToken = TokenNameobjectCAST;
757 if ((data[index] == 's') && (data[++index] == 't')
758 && (data[++index] == 'r') && (data[++index] == 'i')
759 && (data[++index] == 'n') && (data[++index] == 'g')) {
761 tempToken = TokenNamestringCAST;
764 if ((data[index] == 'd') && (data[++index] == 'o')
765 && (data[++index] == 'u') && (data[++index] == 'b')
766 && (data[++index] == 'l') && (data[++index] == 'e')) {
768 tempToken = TokenNamedoubleCAST;
775 if ((data[index] == 'b') && (data[++index] == 'o')
776 && (data[++index] == 'o') && (data[++index] == 'l')
777 && (data[++index] == 'e') && (data[++index] == 'a')
778 && (data[++index] == 'n')) {
780 tempToken = TokenNameboolCAST;
783 if ((data[index] == 'i') && (data[++index] == 'n')
784 && (data[++index] == 't') && (data[++index] == 'e')
785 && (data[++index] == 'g') && (data[++index] == 'e')
786 && (data[++index] == 'r')) {
788 tempToken = TokenNameintCAST;
794 while (currentCharacter == ' ' || currentCharacter == '\t') {
795 currentCharacter = source[currentPosition++];
797 if (currentCharacter == ')') {
802 } catch (IndexOutOfBoundsException e) {
804 currentCharacter = tempCharacter;
805 currentPosition = tempPosition;
806 return TokenNameLPAREN;
808 public int getNextToken() throws InvalidInputException {
810 return getInlinedHTML(currentPosition);
815 jumpOverMethodBody();
817 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
821 withoutUnicodePtr = 0;
822 //start with a new token
823 char encapsedChar = ' ';
824 if (!encapsedStringStack.isEmpty()) {
825 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
827 if (encapsedChar != '$' && encapsedChar != ' ') {
828 currentCharacter = source[currentPosition++];
829 if (currentCharacter == encapsedChar) {
830 switch (currentCharacter) {
832 return TokenNameEncapsedString0;
834 return TokenNameEncapsedString1;
836 return TokenNameEncapsedString2;
839 while (currentCharacter != encapsedChar) {
840 /** ** in PHP \r and \n are valid in string literals *** */
841 switch (currentCharacter) {
843 int escapeSize = currentPosition;
844 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
845 //scanEscapeCharacter make a side effect on this value and
846 // we need the previous value few lines down this one
847 scanDoubleQuotedEscapeCharacter();
848 escapeSize = currentPosition - escapeSize;
849 if (withoutUnicodePtr == 0) {
850 //buffer all the entries that have been left aside....
851 withoutUnicodePtr = currentPosition - escapeSize - 1
853 System.arraycopy(source, startPosition,
854 withoutUnicodeBuffer, 1, withoutUnicodePtr);
855 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
856 } else { //overwrite the / in the buffer
857 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
858 if (backSlashAsUnicodeInString) { //there are TWO \ in
864 if (isPHPIdentifierStart(source[currentPosition])
865 || source[currentPosition] == '{') {
867 encapsedStringStack.push(new Character('$'));
868 return TokenNameSTRING;
872 if (source[currentPosition] == '$') { // CURLY_OPEN
874 encapsedStringStack.push(new Character('$'));
875 return TokenNameSTRING;
878 // consume next character
879 unicodeAsBackSlash = false;
880 currentCharacter = source[currentPosition++];
881 if (withoutUnicodePtr != 0) {
882 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
887 return TokenNameSTRING;
889 // ---------Consume white space and handles startPosition---------
890 int whiteStart = currentPosition;
891 startPosition = currentPosition;
892 currentCharacter = source[currentPosition++];
893 if (encapsedChar == '$') {
894 switch (currentCharacter) {
896 currentCharacter = source[currentPosition++];
897 return TokenNameSTRING;
899 if (encapsedChar == '$') {
900 if (getNextChar('$'))
901 return TokenNameCURLY_OPEN;
903 return TokenNameLBRACE;
905 return TokenNameRBRACE;
907 return TokenNameLBRACKET;
909 return TokenNameRBRACKET;
911 return TokenNameEncapsedString1;
913 return TokenNameEncapsedString2;
915 return TokenNameEncapsedString0;
917 if (getNextChar('>'))
918 return TokenNameMINUS_GREATER;
919 return TokenNameSTRING;
921 if (currentCharacter == '$') {
922 int oldPosition = currentPosition;
924 currentCharacter = source[currentPosition++];
925 if (currentCharacter == '{') {
926 return TokenNameDOLLAR_LBRACE;
928 if (isPHPIdentifierStart(currentCharacter)) {
929 return scanIdentifierOrKeyword(true);
931 currentPosition = oldPosition;
932 return TokenNameSTRING;
934 } catch (IndexOutOfBoundsException e) {
935 currentPosition = oldPosition;
936 return TokenNameSTRING;
939 if (isPHPIdentifierStart(currentCharacter))
940 return scanIdentifierOrKeyword(false);
941 if (Character.isDigit(currentCharacter))
942 return scanNumber(false);
943 return TokenNameERROR;
946 // boolean isWhiteSpace;
947 while ((currentCharacter == ' ')
948 || Character.isWhitespace(currentCharacter)) {
949 startPosition = currentPosition;
950 currentCharacter = source[currentPosition++];
951 // if (((currentCharacter = source[currentPosition++]) == '\\')
952 // && (source[currentPosition] == 'u')) {
953 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
955 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
956 checkNonExternalizeString();
957 if (recordLineSeparator) {
963 // isWhiteSpace = (currentCharacter == ' ')
964 // || Character.isWhitespace(currentCharacter);
967 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
968 // reposition scanner in case we are interested by spaces as tokens
970 startPosition = whiteStart;
971 return TokenNameWHITESPACE;
973 //little trick to get out in the middle of a source compuation
974 if (currentPosition > eofPosition)
977 // ---------Identify the next token-------------
978 switch (currentCharacter) {
980 return getCastOrParen();
982 return TokenNameRPAREN;
984 return TokenNameLBRACE;
986 return TokenNameRBRACE;
988 return TokenNameLBRACKET;
990 return TokenNameRBRACKET;
992 return TokenNameSEMICOLON;
994 return TokenNameCOMMA;
996 if (getNextChar('='))
997 return TokenNameDOT_EQUAL;
998 if (getNextCharAsDigit())
999 return scanNumber(true);
1000 return TokenNameDOT;
1004 if ((test = getNextChar('+', '=')) == 0)
1005 return TokenNamePLUS_PLUS;
1007 return TokenNamePLUS_EQUAL;
1008 return TokenNamePLUS;
1013 if ((test = getNextChar('-', '=')) == 0)
1014 return TokenNameMINUS_MINUS;
1016 return TokenNameMINUS_EQUAL;
1017 if (getNextChar('>'))
1018 return TokenNameMINUS_GREATER;
1019 return TokenNameMINUS;
1022 if (getNextChar('='))
1023 return TokenNameTWIDDLE_EQUAL;
1024 return TokenNameTWIDDLE;
1026 if (getNextChar('=')) {
1027 if (getNextChar('=')) {
1028 return TokenNameNOT_EQUAL_EQUAL;
1030 return TokenNameNOT_EQUAL;
1032 return TokenNameNOT;
1034 if (getNextChar('='))
1035 return TokenNameMULTIPLY_EQUAL;
1036 return TokenNameMULTIPLY;
1038 if (getNextChar('='))
1039 return TokenNameREMAINDER_EQUAL;
1040 return TokenNameREMAINDER;
1043 int oldPosition = currentPosition;
1045 currentCharacter = source[currentPosition++];
1046 } catch (IndexOutOfBoundsException e) {
1047 currentPosition = oldPosition;
1048 return TokenNameLESS;
1050 switch (currentCharacter) {
1052 return TokenNameLESS_EQUAL;
1054 return TokenNameNOT_EQUAL;
1056 if (getNextChar('='))
1057 return TokenNameLEFT_SHIFT_EQUAL;
1058 if (getNextChar('<')) {
1059 currentCharacter = source[currentPosition++];
1060 while (Character.isWhitespace(currentCharacter)) {
1061 currentCharacter = source[currentPosition++];
1063 int heredocStart = currentPosition - 1;
1064 int heredocLength = 0;
1065 if (isPHPIdentifierStart(currentCharacter)) {
1066 currentCharacter = source[currentPosition++];
1068 return TokenNameERROR;
1070 while (isPHPIdentifierPart(currentCharacter)) {
1071 currentCharacter = source[currentPosition++];
1073 heredocLength = currentPosition - heredocStart - 1;
1074 // heredoc end-tag determination
1075 boolean endTag = true;
1078 ch = source[currentPosition++];
1079 if (ch == '\r' || ch == '\n') {
1080 if (recordLineSeparator) {
1081 pushLineSeparator();
1085 for (int i = 0; i < heredocLength; i++) {
1086 if (source[currentPosition + i] != source[heredocStart
1093 currentPosition += heredocLength - 1;
1094 currentCharacter = source[currentPosition++];
1095 break; // do...while loop
1101 return TokenNameHEREDOC;
1103 return TokenNameLEFT_SHIFT;
1105 currentPosition = oldPosition;
1106 return TokenNameLESS;
1111 if ((test = getNextChar('=', '>')) == 0)
1112 return TokenNameGREATER_EQUAL;
1114 if ((test = getNextChar('=', '>')) == 0)
1115 return TokenNameRIGHT_SHIFT_EQUAL;
1116 return TokenNameRIGHT_SHIFT;
1118 return TokenNameGREATER;
1121 if (getNextChar('=')) {
1122 if (getNextChar('=')) {
1123 return TokenNameEQUAL_EQUAL_EQUAL;
1125 return TokenNameEQUAL_EQUAL;
1127 if (getNextChar('>'))
1128 return TokenNameEQUAL_GREATER;
1129 return TokenNameEQUAL;
1133 if ((test = getNextChar('&', '=')) == 0)
1134 return TokenNameAND_AND;
1136 return TokenNameAND_EQUAL;
1137 return TokenNameAND;
1142 if ((test = getNextChar('|', '=')) == 0)
1143 return TokenNameOR_OR;
1145 return TokenNameOR_EQUAL;
1149 if (getNextChar('='))
1150 return TokenNameXOR_EQUAL;
1151 return TokenNameXOR;
1153 if (getNextChar('>')) {
1155 if (currentPosition == source.length) {
1157 return TokenNameINLINE_HTML;
1159 return getInlinedHTML(currentPosition - 2);
1161 return TokenNameQUESTION;
1163 if (getNextChar(':'))
1164 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1165 return TokenNameCOLON;
1169 // return TokenNameEncapsedString1;
1171 // consume next character
1172 unicodeAsBackSlash = false;
1173 currentCharacter = source[currentPosition++];
1174 // if (((currentCharacter = source[currentPosition++]) == '\\')
1175 // && (source[currentPosition] == 'u')) {
1176 // getNextUnicodeChar();
1178 // if (withoutUnicodePtr != 0) {
1179 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1180 // currentCharacter;
1183 while (currentCharacter != '\'') {
1184 /** ** in PHP \r and \n are valid in string literals *** */
1185 // if ((currentCharacter == '\n')
1186 // || (currentCharacter == '\r')) {
1187 // // relocate if finding another quote fairly close: thus
1188 // unicode '/u000D' will be fully consumed
1189 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1190 // if (currentPosition + lookAhead == source.length)
1192 // if (source[currentPosition + lookAhead] == '\n')
1194 // if (source[currentPosition + lookAhead] == '\"') {
1195 // currentPosition += lookAhead + 1;
1199 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1201 if (currentCharacter == '\\') {
1202 int escapeSize = currentPosition;
1203 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1204 //scanEscapeCharacter make a side effect on this value and
1205 // we need the previous value few lines down this one
1206 scanSingleQuotedEscapeCharacter();
1207 escapeSize = currentPosition - escapeSize;
1208 if (withoutUnicodePtr == 0) {
1209 //buffer all the entries that have been left aside....
1210 withoutUnicodePtr = currentPosition - escapeSize - 1
1212 System.arraycopy(source, startPosition,
1213 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1214 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1215 } else { //overwrite the / in the buffer
1216 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1217 if (backSlashAsUnicodeInString) { //there are TWO \ in
1219 // only one is correct
1220 withoutUnicodePtr--;
1224 // consume next character
1225 unicodeAsBackSlash = false;
1226 currentCharacter = source[currentPosition++];
1227 // if (((currentCharacter = source[currentPosition++]) ==
1229 // && (source[currentPosition] == 'u')) {
1230 // getNextUnicodeChar();
1232 if (withoutUnicodePtr != 0) {
1233 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1237 } catch (IndexOutOfBoundsException e) {
1238 throw new InvalidInputException(UNTERMINATED_STRING);
1239 } catch (InvalidInputException e) {
1240 if (e.getMessage().equals(INVALID_ESCAPE)) {
1241 // relocate if finding another quote fairly close: thus
1242 // unicode '/u000D' will be fully consumed
1243 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1244 if (currentPosition + lookAhead == source.length)
1246 if (source[currentPosition + lookAhead] == '\n')
1248 if (source[currentPosition + lookAhead] == '\'') {
1249 currentPosition += lookAhead + 1;
1256 if (checkNonExternalizedStringLiterals) { // check for presence
1258 // //$NON-NLS-?$ where
1260 if (currentLine == null) {
1261 currentLine = new NLSLine();
1262 lines.add(currentLine);
1264 currentLine.add(new StringLiteral(
1265 getCurrentTokenSourceString(), startPosition,
1266 currentPosition - 1));
1268 return TokenNameStringConstant;
1270 return TokenNameEncapsedString2;
1272 return TokenNameEncapsedString0;
1276 char startChar = currentCharacter;
1277 if (getNextChar('=')) {
1278 return TokenNameDIVIDE_EQUAL;
1281 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1283 int endPositionForLineComment = 0;
1284 try { //get the next char
1285 currentCharacter = source[currentPosition++];
1286 // if (((currentCharacter = source[currentPosition++])
1288 // && (source[currentPosition] == 'u')) {
1289 // //-------------unicode traitement ------------
1290 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1291 // currentPosition++;
1292 // while (source[currentPosition] == 'u') {
1293 // currentPosition++;
1296 // Character.getNumericValue(source[currentPosition++]))
1300 // Character.getNumericValue(source[currentPosition++]))
1304 // Character.getNumericValue(source[currentPosition++]))
1308 // Character.getNumericValue(source[currentPosition++]))
1312 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1314 // currentCharacter =
1315 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1318 //handle the \\u case manually into comment
1319 // if (currentCharacter == '\\') {
1320 // if (source[currentPosition] == '\\')
1321 // currentPosition++;
1322 // } //jump over the \\
1323 boolean isUnicode = false;
1324 while (currentCharacter != '\r'
1325 && currentCharacter != '\n') {
1326 if (currentCharacter == '?') {
1327 if (getNextChar('>')) {
1328 startPosition = currentPosition - 2;
1330 return TokenNameINLINE_HTML;
1335 currentCharacter = source[currentPosition++];
1336 // if (((currentCharacter = source[currentPosition++])
1338 // && (source[currentPosition] == 'u')) {
1339 // isUnicode = true;
1340 // //-------------unicode traitement ------------
1341 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1342 // currentPosition++;
1343 // while (source[currentPosition] == 'u') {
1344 // currentPosition++;
1347 // Character.getNumericValue(source[currentPosition++]))
1351 // Character.getNumericValue(
1352 // source[currentPosition++]))
1356 // Character.getNumericValue(
1357 // source[currentPosition++]))
1361 // Character.getNumericValue(
1362 // source[currentPosition++]))
1366 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1368 // currentCharacter =
1369 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1372 //handle the \\u case manually into comment
1373 // if (currentCharacter == '\\') {
1374 // if (source[currentPosition] == '\\')
1375 // currentPosition++;
1376 // } //jump over the \\
1379 endPositionForLineComment = currentPosition - 6;
1381 endPositionForLineComment = currentPosition - 1;
1383 recordComment(false);
1384 if ((currentCharacter == '\r')
1385 || (currentCharacter == '\n')) {
1386 checkNonExternalizeString();
1387 if (recordLineSeparator) {
1389 pushUnicodeLineSeparator();
1391 pushLineSeparator();
1397 if (tokenizeComments) {
1399 currentPosition = endPositionForLineComment;
1400 // reset one character behind
1402 return TokenNameCOMMENT_LINE;
1404 } catch (IndexOutOfBoundsException e) { //an eof will them
1406 if (tokenizeComments) {
1408 // reset one character behind
1409 return TokenNameCOMMENT_LINE;
1415 //traditional and annotation comment
1416 boolean isJavadoc = false, star = false;
1417 // consume next character
1418 unicodeAsBackSlash = false;
1419 currentCharacter = source[currentPosition++];
1420 // if (((currentCharacter = source[currentPosition++]) ==
1422 // && (source[currentPosition] == 'u')) {
1423 // getNextUnicodeChar();
1425 // if (withoutUnicodePtr != 0) {
1426 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1427 // currentCharacter;
1430 if (currentCharacter == '*') {
1434 if ((currentCharacter == '\r')
1435 || (currentCharacter == '\n')) {
1436 checkNonExternalizeString();
1437 if (recordLineSeparator) {
1438 pushLineSeparator();
1443 try { //get the next char
1444 currentCharacter = source[currentPosition++];
1445 // if (((currentCharacter = source[currentPosition++])
1447 // && (source[currentPosition] == 'u')) {
1448 // //-------------unicode traitement ------------
1449 // getNextUnicodeChar();
1451 //handle the \\u case manually into comment
1452 // if (currentCharacter == '\\') {
1453 // if (source[currentPosition] == '\\')
1454 // currentPosition++;
1455 // //jump over the \\
1457 // empty comment is not a javadoc /**/
1458 if (currentCharacter == '/') {
1461 //loop until end of comment */
1462 while ((currentCharacter != '/') || (!star)) {
1463 if ((currentCharacter == '\r')
1464 || (currentCharacter == '\n')) {
1465 checkNonExternalizeString();
1466 if (recordLineSeparator) {
1467 pushLineSeparator();
1472 star = currentCharacter == '*';
1474 currentCharacter = source[currentPosition++];
1475 // if (((currentCharacter = source[currentPosition++])
1477 // && (source[currentPosition] == 'u')) {
1478 // //-------------unicode traitement ------------
1479 // getNextUnicodeChar();
1481 //handle the \\u case manually into comment
1482 // if (currentCharacter == '\\') {
1483 // if (source[currentPosition] == '\\')
1484 // currentPosition++;
1485 // } //jump over the \\
1487 recordComment(isJavadoc);
1488 if (tokenizeComments) {
1490 return TokenNameCOMMENT_PHPDOC;
1491 return TokenNameCOMMENT_BLOCK;
1493 } catch (IndexOutOfBoundsException e) {
1494 throw new InvalidInputException(UNTERMINATED_COMMENT);
1498 return TokenNameDIVIDE;
1502 return TokenNameEOF;
1503 //the atEnd may not be <currentPosition == source.length> if
1504 // source is only some part of a real (external) stream
1505 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1507 if (currentCharacter == '$') {
1508 int oldPosition = currentPosition;
1510 currentCharacter = source[currentPosition++];
1511 if (isPHPIdentifierStart(currentCharacter)) {
1512 return scanIdentifierOrKeyword(true);
1514 currentPosition = oldPosition;
1515 return TokenNameDOLLAR;
1517 } catch (IndexOutOfBoundsException e) {
1518 currentPosition = oldPosition;
1519 return TokenNameDOLLAR;
1522 if (isPHPIdentifierStart(currentCharacter))
1523 return scanIdentifierOrKeyword(false);
1524 if (Character.isDigit(currentCharacter))
1525 return scanNumber(false);
1526 return TokenNameERROR;
1530 } //-----------------end switch while try--------------------
1531 catch (IndexOutOfBoundsException e) {
1534 return TokenNameEOF;
1538 * InvalidInputException
1540 private int getInlinedHTML(int start) throws InvalidInputException {
1541 // int htmlPosition = start;
1542 if (currentPosition > source.length) {
1543 currentPosition = source.length;
1544 return TokenNameEOF;
1546 startPosition = start;
1549 currentCharacter = source[currentPosition++];
1550 if (currentCharacter == '<') {
1551 if (getNextChar('?')) {
1552 currentCharacter = source[currentPosition++];
1553 if ((currentCharacter == ' ')
1554 || Character.isWhitespace(currentCharacter)) {
1557 return TokenNameINLINE_HTML;
1559 boolean phpStart = (currentCharacter == 'P')
1560 || (currentCharacter == 'p');
1562 int test = getNextChar('H', 'h');
1564 test = getNextChar('P', 'p');
1568 return TokenNameINLINE_HTML;
1575 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1576 if (recordLineSeparator) {
1577 pushLineSeparator();
1582 } //-----------------while--------------------
1584 return TokenNameINLINE_HTML;
1585 } //-----------------try--------------------
1586 catch (IndexOutOfBoundsException e) {
1587 startPosition = start;
1591 return TokenNameINLINE_HTML;
1593 // public final void getNextUnicodeChar()
1594 // throws IndexOutOfBoundsException, InvalidInputException {
1596 // //handle the case of unicode.
1597 // //when a unicode appears then we must use a buffer that holds char
1599 // //At the end of this method currentCharacter holds the new visited char
1600 // //and currentPosition points right next after it
1602 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1604 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1605 // currentPosition++;
1606 // while (source[currentPosition] == 'u') {
1607 // currentPosition++;
1611 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1613 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1615 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1617 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1619 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1621 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1622 // //need the unicode buffer
1623 // if (withoutUnicodePtr == 0) {
1624 // //buffer all the entries that have been left aside....
1625 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1626 // System.arraycopy(
1629 // withoutUnicodeBuffer,
1631 // withoutUnicodePtr);
1633 // //fill the buffer with the char
1634 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1636 // unicodeAsBackSlash = currentCharacter == '\\';
1639 * Tokenize a method body, assuming that curly brackets are properly
1642 public final void jumpOverMethodBody() {
1643 this.wasAcr = false;
1646 while (true) { //loop for jumping over comments
1647 // ---------Consume white space and handles startPosition---------
1648 boolean isWhiteSpace;
1650 startPosition = currentPosition;
1651 currentCharacter = source[currentPosition++];
1652 // if (((currentCharacter = source[currentPosition++]) == '\\')
1653 // && (source[currentPosition] == 'u')) {
1654 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1656 if (recordLineSeparator
1657 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1658 pushLineSeparator();
1659 isWhiteSpace = Character.isWhitespace(currentCharacter);
1661 } while (isWhiteSpace);
1662 // -------consume token until } is found---------
1663 switch (currentCharacter) {
1675 test = getNextChar('\\');
1678 scanDoubleQuotedEscapeCharacter();
1679 } catch (InvalidInputException ex) {
1682 // try { // consume next character
1683 unicodeAsBackSlash = false;
1684 currentCharacter = source[currentPosition++];
1685 // if (((currentCharacter = source[currentPosition++]) == '\\')
1686 // && (source[currentPosition] == 'u')) {
1687 // getNextUnicodeChar();
1689 if (withoutUnicodePtr != 0) {
1690 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1693 // } catch (InvalidInputException ex) {
1701 // try { // consume next character
1702 unicodeAsBackSlash = false;
1703 currentCharacter = source[currentPosition++];
1704 // if (((currentCharacter = source[currentPosition++]) == '\\')
1705 // && (source[currentPosition] == 'u')) {
1706 // getNextUnicodeChar();
1708 if (withoutUnicodePtr != 0) {
1709 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1712 // } catch (InvalidInputException ex) {
1714 while (currentCharacter != '"') {
1715 if (currentCharacter == '\r') {
1716 if (source[currentPosition] == '\n')
1719 // the string cannot go further that the line
1721 if (currentCharacter == '\n') {
1723 // the string cannot go further that the line
1725 if (currentCharacter == '\\') {
1727 scanDoubleQuotedEscapeCharacter();
1728 } catch (InvalidInputException ex) {
1731 // try { // consume next character
1732 unicodeAsBackSlash = false;
1733 currentCharacter = source[currentPosition++];
1734 // if (((currentCharacter = source[currentPosition++]) == '\\')
1735 // && (source[currentPosition] == 'u')) {
1736 // getNextUnicodeChar();
1738 if (withoutUnicodePtr != 0) {
1739 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1742 // } catch (InvalidInputException ex) {
1745 } catch (IndexOutOfBoundsException e) {
1752 if ((test = getNextChar('/', '*')) == 0) {
1756 currentCharacter = source[currentPosition++];
1757 // if (((currentCharacter = source[currentPosition++]) ==
1759 // && (source[currentPosition] == 'u')) {
1760 // //-------------unicode traitement ------------
1761 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1762 // currentPosition++;
1763 // while (source[currentPosition] == 'u') {
1764 // currentPosition++;
1767 // Character.getNumericValue(source[currentPosition++]))
1771 // Character.getNumericValue(source[currentPosition++]))
1775 // Character.getNumericValue(source[currentPosition++]))
1779 // Character.getNumericValue(source[currentPosition++]))
1782 // //error don't care of the value
1783 // currentCharacter = 'A';
1784 // } //something different from \n and \r
1786 // currentCharacter =
1787 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1790 while (currentCharacter != '\r' && currentCharacter != '\n') {
1792 currentCharacter = source[currentPosition++];
1793 // if (((currentCharacter = source[currentPosition++])
1795 // && (source[currentPosition] == 'u')) {
1796 // //-------------unicode traitement ------------
1797 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1798 // currentPosition++;
1799 // while (source[currentPosition] == 'u') {
1800 // currentPosition++;
1803 // Character.getNumericValue(source[currentPosition++]))
1807 // Character.getNumericValue(source[currentPosition++]))
1811 // Character.getNumericValue(source[currentPosition++]))
1815 // Character.getNumericValue(source[currentPosition++]))
1818 // //error don't care of the value
1819 // currentCharacter = 'A';
1820 // } //something different from \n and \r
1822 // currentCharacter =
1823 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1827 if (recordLineSeparator
1828 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1829 pushLineSeparator();
1830 } catch (IndexOutOfBoundsException e) {
1831 } //an eof will them be generated
1835 //traditional and annotation comment
1836 boolean star = false;
1837 // try { // consume next character
1838 unicodeAsBackSlash = false;
1839 currentCharacter = source[currentPosition++];
1840 // if (((currentCharacter = source[currentPosition++]) == '\\')
1841 // && (source[currentPosition] == 'u')) {
1842 // getNextUnicodeChar();
1844 if (withoutUnicodePtr != 0) {
1845 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1848 // } catch (InvalidInputException ex) {
1850 if (currentCharacter == '*') {
1853 if (recordLineSeparator
1854 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1855 pushLineSeparator();
1856 try { //get the next char
1857 currentCharacter = source[currentPosition++];
1858 // if (((currentCharacter = source[currentPosition++]) ==
1860 // && (source[currentPosition] == 'u')) {
1861 // //-------------unicode traitement ------------
1862 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1863 // currentPosition++;
1864 // while (source[currentPosition] == 'u') {
1865 // currentPosition++;
1868 // Character.getNumericValue(source[currentPosition++]))
1872 // Character.getNumericValue(source[currentPosition++]))
1876 // Character.getNumericValue(source[currentPosition++]))
1880 // Character.getNumericValue(source[currentPosition++]))
1883 // //error don't care of the value
1884 // currentCharacter = 'A';
1885 // } //something different from * and /
1887 // currentCharacter =
1888 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1891 //loop until end of comment */
1892 while ((currentCharacter != '/') || (!star)) {
1893 if (recordLineSeparator
1894 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1895 pushLineSeparator();
1896 star = currentCharacter == '*';
1898 currentCharacter = source[currentPosition++];
1899 // if (((currentCharacter = source[currentPosition++])
1901 // && (source[currentPosition] == 'u')) {
1902 // //-------------unicode traitement ------------
1903 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1904 // currentPosition++;
1905 // while (source[currentPosition] == 'u') {
1906 // currentPosition++;
1909 // Character.getNumericValue(source[currentPosition++]))
1913 // Character.getNumericValue(source[currentPosition++]))
1917 // Character.getNumericValue(source[currentPosition++]))
1921 // Character.getNumericValue(source[currentPosition++]))
1924 // //error don't care of the value
1925 // currentCharacter = 'A';
1926 // } //something different from * and /
1928 // currentCharacter =
1929 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1933 } catch (IndexOutOfBoundsException e) {
1941 if (isPHPIdentifierStart(currentCharacter)
1942 || currentCharacter == '$') {
1944 scanIdentifierOrKeyword((currentCharacter == '$'));
1945 } catch (InvalidInputException ex) {
1949 if (Character.isDigit(currentCharacter)) {
1952 } catch (InvalidInputException ex) {
1958 //-----------------end switch while try--------------------
1959 } catch (IndexOutOfBoundsException e) {
1960 } catch (InvalidInputException e) {
1964 // public final boolean jumpOverUnicodeWhiteSpace()
1965 // throws InvalidInputException {
1967 // //handle the case of unicode. Jump over the next whiteSpace
1968 // //making startPosition pointing on the next available char
1969 // //On false, the currentCharacter is filled up with a potential
1973 // this.wasAcr = false;
1974 // int c1, c2, c3, c4;
1975 // int unicodeSize = 6;
1976 // currentPosition++;
1977 // while (source[currentPosition] == 'u') {
1978 // currentPosition++;
1982 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1984 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1986 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1988 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1990 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1993 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1994 // if (recordLineSeparator
1995 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1996 // pushLineSeparator();
1997 // if (Character.isWhitespace(currentCharacter))
2000 // //buffer the new char which is not a white space
2001 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2002 // //withoutUnicodePtr == 1 is true here
2004 // } catch (IndexOutOfBoundsException e) {
2005 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2008 public final int[] getLineEnds() {
2009 //return a bounded copy of this.lineEnds
2011 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2014 public char[] getSource() {
2017 final char[] optimizedCurrentTokenSource1() {
2018 //return always the same char[] build only once
2019 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2020 char charOne = source[startPosition];
2075 return new char[]{charOne};
2078 final char[] optimizedCurrentTokenSource2() {
2079 //try to return the same char[] build only once
2081 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2083 char[][] table = charArray_length[0][hash];
2085 while (++i < InternalTableSize) {
2086 char[] charArray = table[i];
2087 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2090 //---------other side---------
2092 int max = newEntry2;
2093 while (++i <= max) {
2094 char[] charArray = table[i];
2095 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2098 //--------add the entry-------
2099 if (++max >= InternalTableSize)
2102 table[max] = (r = new char[]{c0, c1});
2106 final char[] optimizedCurrentTokenSource3() {
2107 //try to return the same char[] build only once
2109 int hash = (((c0 = source[startPosition]) << 12)
2110 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2112 char[][] table = charArray_length[1][hash];
2114 while (++i < InternalTableSize) {
2115 char[] charArray = table[i];
2116 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2119 //---------other side---------
2121 int max = newEntry3;
2122 while (++i <= max) {
2123 char[] charArray = table[i];
2124 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2127 //--------add the entry-------
2128 if (++max >= InternalTableSize)
2131 table[max] = (r = new char[]{c0, c1, c2});
2135 final char[] optimizedCurrentTokenSource4() {
2136 //try to return the same char[] build only once
2137 char c0, c1, c2, c3;
2138 long hash = ((((long) (c0 = source[startPosition])) << 18)
2139 + ((c1 = source[startPosition + 1]) << 12)
2140 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2142 char[][] table = charArray_length[2][(int) hash];
2144 while (++i < InternalTableSize) {
2145 char[] charArray = table[i];
2146 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2147 && (c3 == charArray[3]))
2150 //---------other side---------
2152 int max = newEntry4;
2153 while (++i <= max) {
2154 char[] charArray = table[i];
2155 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2156 && (c3 == charArray[3]))
2159 //--------add the entry-------
2160 if (++max >= InternalTableSize)
2163 table[max] = (r = new char[]{c0, c1, c2, c3});
2167 final char[] optimizedCurrentTokenSource5() {
2168 //try to return the same char[] build only once
2169 char c0, c1, c2, c3, c4;
2170 long hash = ((((long) (c0 = source[startPosition])) << 24)
2171 + (((long) (c1 = source[startPosition + 1])) << 18)
2172 + ((c2 = source[startPosition + 2]) << 12)
2173 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2175 char[][] table = charArray_length[3][(int) hash];
2177 while (++i < InternalTableSize) {
2178 char[] charArray = table[i];
2179 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2180 && (c3 == charArray[3]) && (c4 == charArray[4]))
2183 //---------other side---------
2185 int max = newEntry5;
2186 while (++i <= max) {
2187 char[] charArray = table[i];
2188 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2189 && (c3 == charArray[3]) && (c4 == charArray[4]))
2192 //--------add the entry-------
2193 if (++max >= InternalTableSize)
2196 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2200 final char[] optimizedCurrentTokenSource6() {
2201 //try to return the same char[] build only once
2202 char c0, c1, c2, c3, c4, c5;
2203 long hash = ((((long) (c0 = source[startPosition])) << 32)
2204 + (((long) (c1 = source[startPosition + 1])) << 24)
2205 + (((long) (c2 = source[startPosition + 2])) << 18)
2206 + ((c3 = source[startPosition + 3]) << 12)
2207 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2209 char[][] table = charArray_length[4][(int) hash];
2211 while (++i < InternalTableSize) {
2212 char[] charArray = table[i];
2213 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2214 && (c3 == charArray[3]) && (c4 == charArray[4])
2215 && (c5 == charArray[5]))
2218 //---------other side---------
2220 int max = newEntry6;
2221 while (++i <= max) {
2222 char[] charArray = table[i];
2223 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2224 && (c3 == charArray[3]) && (c4 == charArray[4])
2225 && (c5 == charArray[5]))
2228 //--------add the entry-------
2229 if (++max >= InternalTableSize)
2232 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2236 public final void pushLineSeparator() throws InvalidInputException {
2237 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2238 final int INCREMENT = 250;
2239 if (this.checkNonExternalizedStringLiterals) {
2240 // reinitialize the current line for non externalize strings purpose
2243 //currentCharacter is at position currentPosition-1
2245 if (currentCharacter == '\r') {
2246 int separatorPos = currentPosition - 1;
2247 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2249 //System.out.println("CR-" + separatorPos);
2251 lineEnds[++linePtr] = separatorPos;
2252 } catch (IndexOutOfBoundsException e) {
2253 //linePtr value is correct
2254 int oldLength = lineEnds.length;
2255 int[] old = lineEnds;
2256 lineEnds = new int[oldLength + INCREMENT];
2257 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2258 lineEnds[linePtr] = separatorPos;
2260 // look-ahead for merged cr+lf
2262 if (source[currentPosition] == '\n') {
2263 //System.out.println("look-ahead LF-" + currentPosition);
2264 lineEnds[linePtr] = currentPosition;
2270 } catch (IndexOutOfBoundsException e) {
2275 if (currentCharacter == '\n') {
2276 //must merge eventual cr followed by lf
2277 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2278 //System.out.println("merge LF-" + (currentPosition - 1));
2279 lineEnds[linePtr] = currentPosition - 1;
2281 int separatorPos = currentPosition - 1;
2282 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2284 // System.out.println("LF-" + separatorPos);
2286 lineEnds[++linePtr] = separatorPos;
2287 } catch (IndexOutOfBoundsException e) {
2288 //linePtr value is correct
2289 int oldLength = lineEnds.length;
2290 int[] old = lineEnds;
2291 lineEnds = new int[oldLength + INCREMENT];
2292 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2293 lineEnds[linePtr] = separatorPos;
2300 public final void pushUnicodeLineSeparator() {
2301 // isUnicode means that the \r or \n has been read as a unicode character
2302 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2303 final int INCREMENT = 250;
2304 //currentCharacter is at position currentPosition-1
2305 if (this.checkNonExternalizedStringLiterals) {
2306 // reinitialize the current line for non externalize strings purpose
2310 if (currentCharacter == '\r') {
2311 int separatorPos = currentPosition - 6;
2312 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2314 //System.out.println("CR-" + separatorPos);
2316 lineEnds[++linePtr] = separatorPos;
2317 } catch (IndexOutOfBoundsException e) {
2318 //linePtr value is correct
2319 int oldLength = lineEnds.length;
2320 int[] old = lineEnds;
2321 lineEnds = new int[oldLength + INCREMENT];
2322 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2323 lineEnds[linePtr] = separatorPos;
2325 // look-ahead for merged cr+lf
2326 if (source[currentPosition] == '\n') {
2327 //System.out.println("look-ahead LF-" + currentPosition);
2328 lineEnds[linePtr] = currentPosition;
2336 if (currentCharacter == '\n') {
2337 //must merge eventual cr followed by lf
2338 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2339 //System.out.println("merge LF-" + (currentPosition - 1));
2340 lineEnds[linePtr] = currentPosition - 6;
2342 int separatorPos = currentPosition - 6;
2343 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2345 // System.out.println("LF-" + separatorPos);
2347 lineEnds[++linePtr] = separatorPos;
2348 } catch (IndexOutOfBoundsException e) {
2349 //linePtr value is correct
2350 int oldLength = lineEnds.length;
2351 int[] old = lineEnds;
2352 lineEnds = new int[oldLength + INCREMENT];
2353 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2354 lineEnds[linePtr] = separatorPos;
2361 public final void recordComment(boolean isJavadoc) {
2362 // a new annotation comment is recorded
2364 commentStops[++commentPtr] = isJavadoc
2367 } catch (IndexOutOfBoundsException e) {
2368 int oldStackLength = commentStops.length;
2369 int[] oldStack = commentStops;
2370 commentStops = new int[oldStackLength + 30];
2371 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2372 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2373 //grows the positions buffers too
2374 int[] old = commentStarts;
2375 commentStarts = new int[oldStackLength + 30];
2376 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2378 //the buffer is of a correct size here
2379 commentStarts[commentPtr] = startPosition;
2381 public void resetTo(int begin, int end) {
2382 //reset the scanner to a given position where it may rescan again
2384 initialPosition = startPosition = currentPosition = begin;
2385 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2386 commentPtr = -1; // reset comment stack
2388 public final void scanSingleQuotedEscapeCharacter()
2389 throws InvalidInputException {
2390 // the string with "\\u" is a legal string of two chars \ and u
2391 //thus we use a direct access to the source (for regular cases).
2392 // if (unicodeAsBackSlash) {
2393 // // consume next character
2394 // unicodeAsBackSlash = false;
2395 // if (((currentCharacter = source[currentPosition++]) == '\\')
2396 // && (source[currentPosition] == 'u')) {
2397 // getNextUnicodeChar();
2399 // if (withoutUnicodePtr != 0) {
2400 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2404 currentCharacter = source[currentPosition++];
2405 switch (currentCharacter) {
2407 currentCharacter = '\'';
2410 currentCharacter = '\\';
2413 currentCharacter = '\\';
2417 public final void scanDoubleQuotedEscapeCharacter()
2418 throws InvalidInputException {
2419 // the string with "\\u" is a legal string of two chars \ and u
2420 //thus we use a direct access to the source (for regular cases).
2421 // if (unicodeAsBackSlash) {
2422 // // consume next character
2423 // unicodeAsBackSlash = false;
2424 // if (((currentCharacter = source[currentPosition++]) == '\\')
2425 // && (source[currentPosition] == 'u')) {
2426 // getNextUnicodeChar();
2428 // if (withoutUnicodePtr != 0) {
2429 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2433 currentCharacter = source[currentPosition++];
2434 switch (currentCharacter) {
2436 // currentCharacter = '\b';
2439 currentCharacter = '\t';
2442 currentCharacter = '\n';
2445 // currentCharacter = '\f';
2448 currentCharacter = '\r';
2451 currentCharacter = '\"';
2454 currentCharacter = '\'';
2457 currentCharacter = '\\';
2460 currentCharacter = '$';
2463 // -----------octal escape--------------
2465 // OctalDigit OctalDigit
2466 // ZeroToThree OctalDigit OctalDigit
2467 int number = Character.getNumericValue(currentCharacter);
2468 if (number >= 0 && number <= 7) {
2469 boolean zeroToThreeNot = number > 3;
2470 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2471 int digit = Character.getNumericValue(currentCharacter);
2472 if (digit >= 0 && digit <= 7) {
2473 number = (number * 8) + digit;
2475 .isDigit(currentCharacter = source[currentPosition++])) {
2476 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2477 // Digit --> ignore last character
2480 digit = Character.getNumericValue(currentCharacter);
2481 if (digit >= 0 && digit <= 7) {
2482 // has read \ZeroToThree OctalDigit OctalDigit
2483 number = (number * 8) + digit;
2484 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2485 // --> ignore last character
2489 } else { // has read \OctalDigit NonDigit--> ignore last
2493 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2497 } else { // has read \OctalDigit --> ignore last character
2501 throw new InvalidInputException(INVALID_ESCAPE);
2502 currentCharacter = (char) number;
2505 // throw new InvalidInputException(INVALID_ESCAPE);
2508 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2509 // return scanIdentifierOrKeyword( false );
2511 public int scanIdentifierOrKeyword(boolean isVariable)
2512 throws InvalidInputException {
2514 //first dispatch on the first char.
2515 //then the length. If there are several
2516 //keywors with the same length AND the same first char, then do another
2517 //disptach on the second char :-)...cool....but fast !
2518 useAssertAsAnIndentifier = false;
2519 while (getNextCharAsJavaIdentifierPart()) {
2522 // if (new String(getCurrentTokenSource()).equals("$this")) {
2523 // return TokenNamethis;
2525 return TokenNameVariable;
2530 // if (withoutUnicodePtr == 0)
2531 //quick test on length == 1 but not on length > 12 while most identifier
2532 //have a length which is <= 12...but there are lots of identifier with
2535 if ((length = currentPosition - startPosition) == 1)
2536 return TokenNameIdentifier;
2538 data = new char[length];
2539 index = startPosition;
2540 for (int i = 0; i < length; i++) {
2541 data[i] = Character.toLowerCase(source[index + i]);
2545 // if ((length = withoutUnicodePtr) == 1)
2546 // return TokenNameIdentifier;
2547 // // data = withoutUnicodeBuffer;
2548 // data = new char[withoutUnicodeBuffer.length];
2549 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2550 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2554 firstLetter = data[index];
2555 switch (firstLetter) {
2560 if ((data[++index] == '_') && (data[++index] == 'f')
2561 && (data[++index] == 'i') && (data[++index] == 'l')
2562 && (data[++index] == 'e') && (data[++index] == '_')
2563 && (data[++index] == '_'))
2564 return TokenNameFILE;
2565 index = 0; //__LINE__
2566 if ((data[++index] == '_') && (data[++index] == 'l')
2567 && (data[++index] == 'i') && (data[++index] == 'n')
2568 && (data[++index] == 'e') && (data[++index] == '_')
2569 && (data[++index] == '_'))
2570 return TokenNameLINE;
2574 if ((data[++index] == '_') && (data[++index] == 'c')
2575 && (data[++index] == 'l') && (data[++index] == 'a')
2576 && (data[++index] == 's') && (data[++index] == 's')
2577 && (data[++index] == '_') && (data[++index] == '_'))
2578 return TokenNameCLASS_C;
2582 if ((data[++index] == '_') && (data[++index] == 'm')
2583 && (data[++index] == 'e') && (data[++index] == 't')
2584 && (data[++index] == 'h') && (data[++index] == 'o')
2585 && (data[++index] == 'd') && (data[++index] == '_')
2586 && (data[++index] == '_'))
2587 return TokenNameMETHOD_C;
2591 if ((data[++index] == '_') && (data[++index] == 'f')
2592 && (data[++index] == 'u') && (data[++index] == 'n')
2593 && (data[++index] == 'c') && (data[++index] == 't')
2594 && (data[++index] == 'i') && (data[++index] == 'o')
2595 && (data[++index] == 'n') && (data[++index] == '_')
2596 && (data[++index] == '_'))
2597 return TokenNameFUNC_C;
2600 return TokenNameIdentifier;
2602 // as and array abstract
2606 if ((data[++index] == 's')) {
2609 return TokenNameIdentifier;
2613 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2614 return TokenNameand;
2616 return TokenNameIdentifier;
2620 if ((data[++index] == 'r') && (data[++index] == 'r')
2621 && (data[++index] == 'a') && (data[++index] == 'y'))
2622 return TokenNamearray;
2624 return TokenNameIdentifier;
2626 if ((data[++index] == 'b') && (data[++index] == 's')
2627 && (data[++index] == 't') && (data[++index] == 'r')
2628 && (data[++index] == 'a') && (data[++index] == 'c')
2629 && (data[++index] == 't'))
2630 return TokenNameabstract;
2632 return TokenNameIdentifier;
2634 return TokenNameIdentifier;
2640 if ((data[++index] == 'r') && (data[++index] == 'e')
2641 && (data[++index] == 'a') && (data[++index] == 'k'))
2642 return TokenNamebreak;
2644 return TokenNameIdentifier;
2646 return TokenNameIdentifier;
2649 //case catch class clone const continue
2652 if ((data[++index] == 'a') && (data[++index] == 's')
2653 && (data[++index] == 'e'))
2654 return TokenNamecase;
2656 return TokenNameIdentifier;
2658 if ((data[++index] == 'a') && (data[++index] == 't')
2659 && (data[++index] == 'c') && (data[++index] == 'h'))
2660 return TokenNamecatch;
2662 if ((data[++index] == 'l') && (data[++index] == 'a')
2663 && (data[++index] == 's') && (data[++index] == 's'))
2664 return TokenNameclass;
2666 if ((data[++index] == 'l') && (data[++index] == 'o')
2667 && (data[++index] == 'n') && (data[++index] == 'e'))
2668 return TokenNameclone;
2670 if ((data[++index] == 'o') && (data[++index] == 'n')
2671 && (data[++index] == 's') && (data[++index] == 't'))
2672 return TokenNameconst;
2674 return TokenNameIdentifier;
2676 if ((data[++index] == 'o') && (data[++index] == 'n')
2677 && (data[++index] == 't') && (data[++index] == 'i')
2678 && (data[++index] == 'n') && (data[++index] == 'u')
2679 && (data[++index] == 'e'))
2680 return TokenNamecontinue;
2682 return TokenNameIdentifier;
2684 return TokenNameIdentifier;
2687 // declare default do die
2688 // TODO delete define ==> no keyword !
2691 if ((data[++index] == 'o'))
2694 return TokenNameIdentifier;
2696 // if ((data[++index] == 'e')
2697 // && (data[++index] == 'f')
2698 // && (data[++index] == 'i')
2699 // && (data[++index] == 'n')
2700 // && (data[++index] == 'e'))
2701 // return TokenNamedefine;
2703 // return TokenNameIdentifier;
2705 if ((data[++index] == 'e') && (data[++index] == 'c')
2706 && (data[++index] == 'l') && (data[++index] == 'a')
2707 && (data[++index] == 'r') && (data[++index] == 'e'))
2708 return TokenNamedeclare;
2710 if ((data[++index] == 'e') && (data[++index] == 'f')
2711 && (data[++index] == 'a') && (data[++index] == 'u')
2712 && (data[++index] == 'l') && (data[++index] == 't'))
2713 return TokenNamedefault;
2715 return TokenNameIdentifier;
2717 return TokenNameIdentifier;
2720 //echo else exit elseif extends eval
2723 if ((data[++index] == 'c') && (data[++index] == 'h')
2724 && (data[++index] == 'o'))
2725 return TokenNameecho;
2726 else if ((data[index] == 'l') && (data[++index] == 's')
2727 && (data[++index] == 'e'))
2728 return TokenNameelse;
2729 else if ((data[index] == 'x') && (data[++index] == 'i')
2730 && (data[++index] == 't'))
2731 return TokenNameexit;
2732 else if ((data[index] == 'v') && (data[++index] == 'a')
2733 && (data[++index] == 'l'))
2734 return TokenNameeval;
2736 return TokenNameIdentifier;
2739 if ((data[++index] == 'n') && (data[++index] == 'd')
2740 && (data[++index] == 'i') && (data[++index] == 'f'))
2741 return TokenNameendif;
2742 if ((data[index] == 'm') && (data[++index] == 'p')
2743 && (data[++index] == 't') && (data[++index] == 'y'))
2744 return TokenNameempty;
2746 return TokenNameIdentifier;
2749 if ((data[++index] == 'n') && (data[++index] == 'd')
2750 && (data[++index] == 'f') && (data[++index] == 'o')
2751 && (data[++index] == 'r'))
2752 return TokenNameendfor;
2753 else if ((data[index] == 'l') && (data[++index] == 's')
2754 && (data[++index] == 'e') && (data[++index] == 'i')
2755 && (data[++index] == 'f'))
2756 return TokenNameelseif;
2758 return TokenNameIdentifier;
2760 if ((data[++index] == 'x') && (data[++index] == 't')
2761 && (data[++index] == 'e') && (data[++index] == 'n')
2762 && (data[++index] == 'd') && (data[++index] == 's'))
2763 return TokenNameextends;
2765 return TokenNameIdentifier;
2768 if ((data[++index] == 'n') && (data[++index] == 'd')
2769 && (data[++index] == 'w') && (data[++index] == 'h')
2770 && (data[++index] == 'i') && (data[++index] == 'l')
2771 && (data[++index] == 'e'))
2772 return TokenNameendwhile;
2774 return TokenNameIdentifier;
2777 if ((data[++index] == 'n') && (data[++index] == 'd')
2778 && (data[++index] == 's') && (data[++index] == 'w')
2779 && (data[++index] == 'i') && (data[++index] == 't')
2780 && (data[++index] == 'c') && (data[++index] == 'h'))
2781 return TokenNameendswitch;
2783 return TokenNameIdentifier;
2786 if ((data[++index] == 'n') && (data[++index] == 'd')
2787 && (data[++index] == 'd') && (data[++index] == 'e')
2788 && (data[++index] == 'c') && (data[++index] == 'l')
2789 && (data[++index] == 'a') && (data[++index] == 'r')
2790 && (data[++index] == 'e'))
2791 return TokenNameendforeach;
2793 if ((data[++index] == 'n') // endforeach
2794 && (data[++index] == 'd') && (data[++index] == 'f')
2795 && (data[++index] == 'o') && (data[++index] == 'r')
2796 && (data[++index] == 'e') && (data[++index] == 'a')
2797 && (data[++index] == 'c') && (data[++index] == 'h'))
2798 return TokenNameendforeach;
2800 return TokenNameIdentifier;
2802 return TokenNameIdentifier;
2805 //for false final function
2808 if ((data[++index] == 'o') && (data[++index] == 'r'))
2809 return TokenNamefor;
2811 return TokenNameIdentifier;
2813 // if ((data[++index] == 'a') && (data[++index] == 'l')
2814 // && (data[++index] == 's') && (data[++index] == 'e'))
2815 // return TokenNamefalse;
2816 if ((data[++index] == 'i') && (data[++index] == 'n')
2817 && (data[++index] == 'a') && (data[++index] == 'l'))
2818 return TokenNamefinal;
2820 return TokenNameIdentifier;
2823 if ((data[++index] == 'o') && (data[++index] == 'r')
2824 && (data[++index] == 'e') && (data[++index] == 'a')
2825 && (data[++index] == 'c') && (data[++index] == 'h'))
2826 return TokenNameforeach;
2828 return TokenNameIdentifier;
2831 if ((data[++index] == 'u') && (data[++index] == 'n')
2832 && (data[++index] == 'c') && (data[++index] == 't')
2833 && (data[++index] == 'i') && (data[++index] == 'o')
2834 && (data[++index] == 'n'))
2835 return TokenNamefunction;
2837 return TokenNameIdentifier;
2839 return TokenNameIdentifier;
2844 if ((data[++index] == 'l') && (data[++index] == 'o')
2845 && (data[++index] == 'b') && (data[++index] == 'a')
2846 && (data[++index] == 'l')) {
2847 return TokenNameglobal;
2850 return TokenNameIdentifier;
2852 //if int isset include include_once instanceof interface implements
2855 if (data[++index] == 'f')
2858 return TokenNameIdentifier;
2860 // if ((data[++index] == 'n') && (data[++index] == 't'))
2861 // return TokenNameint;
2863 // return TokenNameIdentifier;
2865 if ((data[++index] == 's') && (data[++index] == 's')
2866 && (data[++index] == 'e') && (data[++index] == 't'))
2867 return TokenNameisset;
2869 return TokenNameIdentifier;
2871 if ((data[++index] == 'n') && (data[++index] == 'c')
2872 && (data[++index] == 'l') && (data[++index] == 'u')
2873 && (data[++index] == 'd') && (data[++index] == 'e'))
2874 return TokenNameinclude;
2876 return TokenNameIdentifier;
2879 if ((data[++index] == 'n') && (data[++index] == 't')
2880 && (data[++index] == 'e') && (data[++index] == 'r')
2881 && (data[++index] == 'f') && (data[++index] == 'a')
2882 && (data[++index] == 'c') && (data[++index] == 'e'))
2883 return TokenNameinterface;
2885 return TokenNameIdentifier;
2888 if ((data[++index] == 'n') && (data[++index] == 's')
2889 && (data[++index] == 't') && (data[++index] == 'a')
2890 && (data[++index] == 'n') && (data[++index] == 'c')
2891 && (data[++index] == 'e') && (data[++index] == 'o')
2892 && (data[++index] == 'f'))
2893 return TokenNameinstanceof;
2894 if ((data[index] == 'm') && (data[++index] == 'p')
2895 && (data[++index] == 'l') && (data[++index] == 'e')
2896 && (data[++index] == 'm') && (data[++index] == 'e')
2897 && (data[++index] == 'n') && (data[++index] == 't')
2898 && (data[++index] == 's'))
2899 return TokenNameimplements;
2901 return TokenNameIdentifier;
2903 if ((data[++index] == 'n') && (data[++index] == 'c')
2904 && (data[++index] == 'l') && (data[++index] == 'u')
2905 && (data[++index] == 'd') && (data[++index] == 'e')
2906 && (data[++index] == '_') && (data[++index] == 'o')
2907 && (data[++index] == 'n') && (data[++index] == 'c')
2908 && (data[++index] == 'e'))
2909 return TokenNameinclude_once;
2911 return TokenNameIdentifier;
2913 return TokenNameIdentifier;
2918 if ((data[++index] == 'i') && (data[++index] == 's')
2919 && (data[++index] == 't')) {
2920 return TokenNamelist;
2923 return TokenNameIdentifier;
2928 if ((data[++index] == 'e') && (data[++index] == 'w'))
2929 return TokenNamenew;
2931 return TokenNameIdentifier;
2933 // if ((data[++index] == 'u') && (data[++index] == 'l')
2934 // && (data[++index] == 'l'))
2935 // return TokenNamenull;
2937 // return TokenNameIdentifier;
2939 return TokenNameIdentifier;
2944 if (data[++index] == 'r') {
2948 // if (length == 12) {
2949 // if ((data[++index] == 'l')
2950 // && (data[++index] == 'd')
2951 // && (data[++index] == '_')
2952 // && (data[++index] == 'f')
2953 // && (data[++index] == 'u')
2954 // && (data[++index] == 'n')
2955 // && (data[++index] == 'c')
2956 // && (data[++index] == 't')
2957 // && (data[++index] == 'i')
2958 // && (data[++index] == 'o')
2959 // && (data[++index] == 'n')) {
2960 // return TokenNameold_function;
2963 return TokenNameIdentifier;
2965 // print public private protected
2968 if ((data[++index] == 'r') && (data[++index] == 'i')
2969 && (data[++index] == 'n') && (data[++index] == 't')) {
2970 return TokenNameprint;
2972 return TokenNameIdentifier;
2974 if ((data[++index] == 'u') && (data[++index] == 'b')
2975 && (data[++index] == 'l') && (data[++index] == 'i')
2976 && (data[++index] == 'c')) {
2977 return TokenNamepublic;
2979 return TokenNameIdentifier;
2981 if ((data[++index] == 'r') && (data[++index] == 'i')
2982 && (data[++index] == 'v') && (data[++index] == 'a')
2983 && (data[++index] == 't') && (data[++index] == 'e')) {
2984 return TokenNameprivate;
2986 return TokenNameIdentifier;
2988 if ((data[++index] == 'r') && (data[++index] == 'o')
2989 && (data[++index] == 't') && (data[++index] == 'e')
2990 && (data[++index] == 'c') && (data[++index] == 't')
2991 && (data[++index] == 'e') && (data[++index] == 'd')) {
2992 return TokenNameprotected;
2994 return TokenNameIdentifier;
2996 return TokenNameIdentifier;
2998 //return require require_once
3000 if ((data[++index] == 'e') && (data[++index] == 't')
3001 && (data[++index] == 'u') && (data[++index] == 'r')
3002 && (data[++index] == 'n')) {
3003 return TokenNamereturn;
3005 } else if (length == 7) {
3006 if ((data[++index] == 'e') && (data[++index] == 'q')
3007 && (data[++index] == 'u') && (data[++index] == 'i')
3008 && (data[++index] == 'r') && (data[++index] == 'e')) {
3009 return TokenNamerequire;
3011 } else if (length == 12) {
3012 if ((data[++index] == 'e') && (data[++index] == 'q')
3013 && (data[++index] == 'u') && (data[++index] == 'i')
3014 && (data[++index] == 'r') && (data[++index] == 'e')
3015 && (data[++index] == '_') && (data[++index] == 'o')
3016 && (data[++index] == 'n') && (data[++index] == 'c')
3017 && (data[++index] == 'e')) {
3018 return TokenNamerequire_once;
3021 return TokenNameIdentifier;
3026 if (data[++index] == 't')
3027 if ((data[++index] == 'a') && (data[++index] == 't')
3028 && (data[++index] == 'i') && (data[++index] == 'c')) {
3029 return TokenNamestatic;
3031 return TokenNameIdentifier;
3032 else if ((data[index] == 'w') && (data[++index] == 'i')
3033 && (data[++index] == 't') && (data[++index] == 'c')
3034 && (data[++index] == 'h'))
3035 return TokenNameswitch;
3037 return TokenNameIdentifier;
3039 return TokenNameIdentifier;
3045 if ((data[++index] == 'r') && (data[++index] == 'y'))
3046 return TokenNametry;
3048 return TokenNameIdentifier;
3050 // if ((data[++index] == 'r') && (data[++index] == 'u')
3051 // && (data[++index] == 'e'))
3052 // return TokenNametrue;
3054 // return TokenNameIdentifier;
3056 if ((data[++index] == 'h') && (data[++index] == 'r')
3057 && (data[++index] == 'o') && (data[++index] == 'w'))
3058 return TokenNamethrow;
3060 return TokenNameIdentifier;
3062 return TokenNameIdentifier;
3068 if ((data[++index] == 's') && (data[++index] == 'e'))
3069 return TokenNameuse;
3071 return TokenNameIdentifier;
3073 if ((data[++index] == 'n') && (data[++index] == 's')
3074 && (data[++index] == 'e') && (data[++index] == 't'))
3075 return TokenNameunset;
3077 return TokenNameIdentifier;
3079 return TokenNameIdentifier;
3085 if ((data[++index] == 'a') && (data[++index] == 'r'))
3086 return TokenNamevar;
3088 return TokenNameIdentifier;
3090 return TokenNameIdentifier;
3096 if ((data[++index] == 'h') && (data[++index] == 'i')
3097 && (data[++index] == 'l') && (data[++index] == 'e'))
3098 return TokenNamewhile;
3100 return TokenNameIdentifier;
3101 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3102 // (data[++index]=='e') && (data[++index]=='f')&&
3103 // (data[++index]=='p'))
3104 //return TokenNamewidefp ;
3106 //return TokenNameIdentifier;
3108 return TokenNameIdentifier;
3114 if ((data[++index] == 'o') && (data[++index] == 'r'))
3115 return TokenNamexor;
3117 return TokenNameIdentifier;
3119 return TokenNameIdentifier;
3122 return TokenNameIdentifier;
3125 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3126 //when entering this method the currentCharacter is the firt
3127 //digit of the number , i.e. it may be preceeded by a . when
3129 boolean floating = dotPrefix;
3130 if ((!dotPrefix) && (currentCharacter == '0')) {
3131 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3132 //force the first char of the hexa number do exist...
3133 // consume next character
3134 unicodeAsBackSlash = false;
3135 currentCharacter = source[currentPosition++];
3136 // if (((currentCharacter = source[currentPosition++]) == '\\')
3137 // && (source[currentPosition] == 'u')) {
3138 // getNextUnicodeChar();
3140 // if (withoutUnicodePtr != 0) {
3141 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3144 if (Character.digit(currentCharacter, 16) == -1)
3145 throw new InvalidInputException(INVALID_HEXA);
3147 while (getNextCharAsDigit(16)) {
3149 // if (getNextChar('l', 'L') >= 0)
3150 // return TokenNameLongLiteral;
3152 return TokenNameIntegerLiteral;
3154 //there is x or X in the number
3155 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3156 // 00078.0 is true !!!!! crazy language
3157 if (getNextCharAsDigit()) {
3158 //-------------potential octal-----------------
3159 while (getNextCharAsDigit()) {
3161 // if (getNextChar('l', 'L') >= 0) {
3162 // return TokenNameLongLiteral;
3165 // if (getNextChar('f', 'F') >= 0) {
3166 // return TokenNameFloatingPointLiteral;
3168 if (getNextChar('d', 'D') >= 0) {
3169 return TokenNameDoubleLiteral;
3170 } else { //make the distinction between octal and float ....
3171 if (getNextChar('.')) { //bingo ! ....
3172 while (getNextCharAsDigit()) {
3174 if (getNextChar('e', 'E') >= 0) {
3175 // consume next character
3176 unicodeAsBackSlash = false;
3177 currentCharacter = source[currentPosition++];
3178 // if (((currentCharacter = source[currentPosition++]) == '\\')
3179 // && (source[currentPosition] == 'u')) {
3180 // getNextUnicodeChar();
3182 // if (withoutUnicodePtr != 0) {
3183 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3186 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3187 // consume next character
3188 unicodeAsBackSlash = false;
3189 currentCharacter = source[currentPosition++];
3190 // if (((currentCharacter = source[currentPosition++]) == '\\')
3191 // && (source[currentPosition] == 'u')) {
3192 // getNextUnicodeChar();
3194 // if (withoutUnicodePtr != 0) {
3195 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3196 // currentCharacter;
3200 if (!Character.isDigit(currentCharacter))
3201 throw new InvalidInputException(INVALID_FLOAT);
3202 while (getNextCharAsDigit()) {
3205 // if (getNextChar('f', 'F') >= 0)
3206 // return TokenNameFloatingPointLiteral;
3207 getNextChar('d', 'D'); //jump over potential d or D
3208 return TokenNameDoubleLiteral;
3210 return TokenNameIntegerLiteral;
3217 while (getNextCharAsDigit()) {
3219 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3220 // return TokenNameLongLiteral;
3221 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3222 while (getNextCharAsDigit()) {
3226 //if floating is true both exponant and suffix may be optional
3227 if (getNextChar('e', 'E') >= 0) {
3229 // consume next character
3230 unicodeAsBackSlash = false;
3231 currentCharacter = source[currentPosition++];
3232 // if (((currentCharacter = source[currentPosition++]) == '\\')
3233 // && (source[currentPosition] == 'u')) {
3234 // getNextUnicodeChar();
3236 // if (withoutUnicodePtr != 0) {
3237 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3240 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3243 unicodeAsBackSlash = false;
3244 currentCharacter = source[currentPosition++];
3245 // if (((currentCharacter = source[currentPosition++]) == '\\')
3246 // && (source[currentPosition] == 'u')) {
3247 // getNextUnicodeChar();
3249 // if (withoutUnicodePtr != 0) {
3250 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3254 if (!Character.isDigit(currentCharacter))
3255 throw new InvalidInputException(INVALID_FLOAT);
3256 while (getNextCharAsDigit()) {
3259 if (getNextChar('d', 'D') >= 0)
3260 return TokenNameDoubleLiteral;
3261 // if (getNextChar('f', 'F') >= 0)
3262 // return TokenNameFloatingPointLiteral;
3263 //the long flag has been tested before
3264 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3267 * Search the line number corresponding to a specific position
3270 public final int getLineNumber(int position) {
3271 if (lineEnds == null)
3273 int length = linePtr + 1;
3276 int g = 0, d = length - 1;
3280 if (position < lineEnds[m]) {
3282 } else if (position > lineEnds[m]) {
3288 if (position < lineEnds[m]) {
3293 public void setPHPMode(boolean mode) {
3296 public final void setSource(char[] source) {
3297 //the source-buffer is set to sourceString
3298 if (source == null) {
3299 this.source = new char[0];
3301 this.source = source;
3304 initialPosition = currentPosition = 0;
3305 containsAssertKeyword = false;
3306 withoutUnicodeBuffer = new char[this.source.length];
3307 encapsedStringStack = new Stack();
3309 public String toString() {
3310 if (startPosition == source.length)
3311 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3312 if (currentPosition > source.length)
3313 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3314 char front[] = new char[startPosition];
3315 System.arraycopy(source, 0, front, 0, startPosition);
3316 int middleLength = (currentPosition - 1) - startPosition + 1;
3318 if (middleLength > -1) {
3319 middle = new char[middleLength];
3320 System.arraycopy(source, startPosition, middle, 0, middleLength);
3322 middle = new char[0];
3324 char end[] = new char[source.length - (currentPosition - 1)];
3325 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3326 - (currentPosition - 1) - 1);
3327 return new String(front)
3328 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3329 + new String(middle)
3330 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3333 public final String toStringAction(int act) {
3335 case TokenNameERROR :
3336 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3338 case TokenNameINLINE_HTML :
3339 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3340 case TokenNameIdentifier :
3341 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3342 case TokenNameVariable :
3343 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3344 case TokenNameabstract :
3345 return "abstract"; //$NON-NLS-1$
3347 return "AND"; //$NON-NLS-1$
3348 case TokenNamearray :
3349 return "array"; //$NON-NLS-1$
3351 return "as"; //$NON-NLS-1$
3352 case TokenNamebreak :
3353 return "break"; //$NON-NLS-1$
3354 case TokenNamecase :
3355 return "case"; //$NON-NLS-1$
3356 case TokenNameclass :
3357 return "class"; //$NON-NLS-1$
3358 case TokenNamecatch :
3359 return "catch"; //$NON-NLS-1$
3360 case TokenNameclone :
3363 case TokenNameconst :
3366 case TokenNamecontinue :
3367 return "continue"; //$NON-NLS-1$
3368 case TokenNamedefault :
3369 return "default"; //$NON-NLS-1$
3370 // case TokenNamedefine :
3371 // return "define"; //$NON-NLS-1$
3373 return "do"; //$NON-NLS-1$
3374 case TokenNameecho :
3375 return "echo"; //$NON-NLS-1$
3376 case TokenNameelse :
3377 return "else"; //$NON-NLS-1$
3378 case TokenNameelseif :
3379 return "elseif"; //$NON-NLS-1$
3380 case TokenNameendfor :
3381 return "endfor"; //$NON-NLS-1$
3382 case TokenNameendforeach :
3383 return "endforeach"; //$NON-NLS-1$
3384 case TokenNameendif :
3385 return "endif"; //$NON-NLS-1$
3386 case TokenNameendswitch :
3387 return "endswitch"; //$NON-NLS-1$
3388 case TokenNameendwhile :
3389 return "endwhile"; //$NON-NLS-1$
3390 case TokenNameextends :
3391 return "extends"; //$NON-NLS-1$
3392 // case TokenNamefalse :
3393 // return "false"; //$NON-NLS-1$
3394 case TokenNamefinal :
3395 return "final"; //$NON-NLS-1$
3397 return "for"; //$NON-NLS-1$
3398 case TokenNameforeach :
3399 return "foreach"; //$NON-NLS-1$
3400 case TokenNamefunction :
3401 return "function"; //$NON-NLS-1$
3402 case TokenNameglobal :
3403 return "global"; //$NON-NLS-1$
3405 return "if"; //$NON-NLS-1$
3406 case TokenNameimplements :
3407 return "implements"; //$NON-NLS-1$
3408 case TokenNameinclude :
3409 return "include"; //$NON-NLS-1$
3410 case TokenNameinclude_once :
3411 return "include_once"; //$NON-NLS-1$
3412 case TokenNameinstanceof :
3413 return "instanceof"; //$NON-NLS-1$
3414 case TokenNameinterface :
3415 return "interface"; //$NON-NLS-1$
3416 case TokenNameisset :
3417 return "isset"; //$NON-NLS-1$
3418 case TokenNamelist :
3419 return "list"; //$NON-NLS-1$
3421 return "new"; //$NON-NLS-1$
3422 // case TokenNamenull :
3423 // return "null"; //$NON-NLS-1$
3425 return "OR"; //$NON-NLS-1$
3426 case TokenNameprint :
3427 return "print"; //$NON-NLS-1$
3428 case TokenNameprivate :
3429 return "private"; //$NON-NLS-1$
3430 case TokenNameprotected :
3431 return "protected"; //$NON-NLS-1$
3432 case TokenNamepublic :
3433 return "public"; //$NON-NLS-1$
3434 case TokenNamerequire :
3435 return "require"; //$NON-NLS-1$
3436 case TokenNamerequire_once :
3437 return "require_once"; //$NON-NLS-1$
3438 case TokenNamereturn :
3439 return "return"; //$NON-NLS-1$
3440 case TokenNamestatic :
3441 return "static"; //$NON-NLS-1$
3442 case TokenNameswitch :
3443 return "switch"; //$NON-NLS-1$
3444 // case TokenNametrue :
3445 // return "true"; //$NON-NLS-1$
3446 case TokenNameunset :
3447 return "unset"; //$NON-NLS-1$
3449 return "var"; //$NON-NLS-1$
3450 case TokenNamewhile :
3451 return "while"; //$NON-NLS-1$
3453 return "XOR"; //$NON-NLS-1$
3454 // case TokenNamethis :
3455 // return "$this"; //$NON-NLS-1$
3456 case TokenNameIntegerLiteral :
3457 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3458 case TokenNameDoubleLiteral :
3459 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3460 case TokenNameStringLiteral :
3461 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3462 case TokenNameStringConstant :
3463 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3464 case TokenNameStringInterpolated :
3465 return "StringInterpolated(" + new String(getCurrentTokenSource())
3466 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3467 case TokenNameEncapsedString0 :
3468 return "`"; //$NON-NLS-1$
3469 case TokenNameEncapsedString1 :
3470 return "\'"; //$NON-NLS-1$
3471 case TokenNameEncapsedString2 :
3472 return "\""; //$NON-NLS-1$
3473 case TokenNameSTRING :
3474 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3475 case TokenNameHEREDOC :
3476 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3477 case TokenNamePLUS_PLUS :
3478 return "++"; //$NON-NLS-1$
3479 case TokenNameMINUS_MINUS :
3480 return "--"; //$NON-NLS-1$
3481 case TokenNameEQUAL_EQUAL :
3482 return "=="; //$NON-NLS-1$
3483 case TokenNameEQUAL_EQUAL_EQUAL :
3484 return "==="; //$NON-NLS-1$
3485 case TokenNameEQUAL_GREATER :
3486 return "=>"; //$NON-NLS-1$
3487 case TokenNameLESS_EQUAL :
3488 return "<="; //$NON-NLS-1$
3489 case TokenNameGREATER_EQUAL :
3490 return ">="; //$NON-NLS-1$
3491 case TokenNameNOT_EQUAL :
3492 return "!="; //$NON-NLS-1$
3493 case TokenNameNOT_EQUAL_EQUAL :
3494 return "!=="; //$NON-NLS-1$
3495 case TokenNameLEFT_SHIFT :
3496 return "<<"; //$NON-NLS-1$
3497 case TokenNameRIGHT_SHIFT :
3498 return ">>"; //$NON-NLS-1$
3499 case TokenNamePLUS_EQUAL :
3500 return "+="; //$NON-NLS-1$
3501 case TokenNameMINUS_EQUAL :
3502 return "-="; //$NON-NLS-1$
3503 case TokenNameMULTIPLY_EQUAL :
3504 return "*="; //$NON-NLS-1$
3505 case TokenNameDIVIDE_EQUAL :
3506 return "/="; //$NON-NLS-1$
3507 case TokenNameAND_EQUAL :
3508 return "&="; //$NON-NLS-1$
3509 case TokenNameOR_EQUAL :
3510 return "|="; //$NON-NLS-1$
3511 case TokenNameXOR_EQUAL :
3512 return "^="; //$NON-NLS-1$
3513 case TokenNameREMAINDER_EQUAL :
3514 return "%="; //$NON-NLS-1$
3515 case TokenNameDOT_EQUAL :
3516 return ".="; //$NON-NLS-1$
3517 case TokenNameLEFT_SHIFT_EQUAL :
3518 return "<<="; //$NON-NLS-1$
3519 case TokenNameRIGHT_SHIFT_EQUAL :
3520 return ">>="; //$NON-NLS-1$
3521 case TokenNameOR_OR :
3522 return "||"; //$NON-NLS-1$
3523 case TokenNameAND_AND :
3524 return "&&"; //$NON-NLS-1$
3525 case TokenNamePLUS :
3526 return "+"; //$NON-NLS-1$
3527 case TokenNameMINUS :
3528 return "-"; //$NON-NLS-1$
3529 case TokenNameMINUS_GREATER :
3532 return "!"; //$NON-NLS-1$
3533 case TokenNameREMAINDER :
3534 return "%"; //$NON-NLS-1$
3536 return "^"; //$NON-NLS-1$
3538 return "&"; //$NON-NLS-1$
3539 case TokenNameMULTIPLY :
3540 return "*"; //$NON-NLS-1$
3542 return "|"; //$NON-NLS-1$
3543 case TokenNameTWIDDLE :
3544 return "~"; //$NON-NLS-1$
3545 case TokenNameTWIDDLE_EQUAL :
3546 return "~="; //$NON-NLS-1$
3547 case TokenNameDIVIDE :
3548 return "/"; //$NON-NLS-1$
3549 case TokenNameGREATER :
3550 return ">"; //$NON-NLS-1$
3551 case TokenNameLESS :
3552 return "<"; //$NON-NLS-1$
3553 case TokenNameLPAREN :
3554 return "("; //$NON-NLS-1$
3555 case TokenNameRPAREN :
3556 return ")"; //$NON-NLS-1$
3557 case TokenNameLBRACE :
3558 return "{"; //$NON-NLS-1$
3559 case TokenNameRBRACE :
3560 return "}"; //$NON-NLS-1$
3561 case TokenNameLBRACKET :
3562 return "["; //$NON-NLS-1$
3563 case TokenNameRBRACKET :
3564 return "]"; //$NON-NLS-1$
3565 case TokenNameSEMICOLON :
3566 return ";"; //$NON-NLS-1$
3567 case TokenNameQUESTION :
3568 return "?"; //$NON-NLS-1$
3569 case TokenNameCOLON :
3570 return ":"; //$NON-NLS-1$
3571 case TokenNameCOMMA :
3572 return ","; //$NON-NLS-1$
3574 return "."; //$NON-NLS-1$
3575 case TokenNameEQUAL :
3576 return "="; //$NON-NLS-1$
3579 case TokenNameDOLLAR :
3581 case TokenNameDOLLAR_LBRACE :
3584 return "EOF"; //$NON-NLS-1$
3585 case TokenNameWHITESPACE :
3586 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3587 case TokenNameCOMMENT_LINE :
3588 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3589 case TokenNameCOMMENT_BLOCK :
3590 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3591 case TokenNameCOMMENT_PHPDOC :
3592 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3593 // case TokenNameHTML :
3594 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3596 case TokenNameFILE :
3597 return "__FILE__"; //$NON-NLS-1$
3598 case TokenNameLINE :
3599 return "__LINE__"; //$NON-NLS-1$
3600 case TokenNameCLASS_C :
3601 return "__CLASS__"; //$NON-NLS-1$
3602 case TokenNameMETHOD_C :
3603 return "__METHOD__"; //$NON-NLS-1$
3604 case TokenNameFUNC_C :
3605 return "__FUNCTION__"; //$NON-NLS-1
3606 case TokenNameboolCAST :
3607 return "( bool )"; //$NON-NLS-1$
3608 case TokenNameintCAST :
3609 return "( int )"; //$NON-NLS-1$
3610 case TokenNamedoubleCAST :
3611 return "( double )"; //$NON-NLS-1$
3612 case TokenNameobjectCAST :
3613 return "( object )"; //$NON-NLS-1$
3614 case TokenNamestringCAST :
3615 return "( string )"; //$NON-NLS-1$
3617 return "not-a-token(" + (new Integer(act)) + ") "
3618 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3621 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3622 boolean checkNonExternalizedStringLiterals) {
3623 this(tokenizeComments, tokenizeWhiteSpace,
3624 checkNonExternalizedStringLiterals, false);
3626 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3627 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3628 this.eofPosition = Integer.MAX_VALUE;
3629 this.tokenizeComments = tokenizeComments;
3630 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3631 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3632 this.assertMode = assertMode;
3633 this.encapsedStringStack = null;
3635 private void checkNonExternalizeString() throws InvalidInputException {
3636 if (currentLine == null)
3638 parseTags(currentLine);
3640 private void parseTags(NLSLine line) throws InvalidInputException {
3641 String s = new String(getCurrentTokenSource());
3642 int pos = s.indexOf(TAG_PREFIX);
3643 int lineLength = line.size();
3645 int start = pos + TAG_PREFIX_LENGTH;
3646 int end = s.indexOf(TAG_POSTFIX, start);
3647 String index = s.substring(start, end);
3650 i = Integer.parseInt(index) - 1;
3651 // Tags are one based not zero based.
3652 } catch (NumberFormatException e) {
3653 i = -1; // we don't want to consider this as a valid NLS tag
3655 if (line.exists(i)) {
3658 pos = s.indexOf(TAG_PREFIX, start);
3660 this.nonNLSStrings = new StringLiteral[lineLength];
3661 int nonNLSCounter = 0;
3662 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3663 StringLiteral literal = (StringLiteral) iterator.next();
3664 if (literal != null) {
3665 this.nonNLSStrings[nonNLSCounter++] = literal;
3668 if (nonNLSCounter == 0) {
3669 this.nonNLSStrings = null;
3673 this.wasNonExternalizedStringLiteral = true;
3674 if (nonNLSCounter != lineLength) {
3675 System.arraycopy(this.nonNLSStrings, 0,
3676 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3681 public final void scanEscapeCharacter() throws InvalidInputException {
3682 // the string with "\\u" is a legal string of two chars \ and u
3683 //thus we use a direct access to the source (for regular cases).
3684 if (unicodeAsBackSlash) {
3685 // consume next character
3686 unicodeAsBackSlash = false;
3687 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3688 // (source[currentPosition] == 'u')) {
3689 // getNextUnicodeChar();
3691 if (withoutUnicodePtr != 0) {
3692 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3696 currentCharacter = source[currentPosition++];
3697 switch (currentCharacter) {
3699 currentCharacter = '\b';
3702 currentCharacter = '\t';
3705 currentCharacter = '\n';
3708 currentCharacter = '\f';
3711 currentCharacter = '\r';
3714 currentCharacter = '\"';
3717 currentCharacter = '\'';
3720 currentCharacter = '\\';
3723 // -----------octal escape--------------
3725 // OctalDigit OctalDigit
3726 // ZeroToThree OctalDigit OctalDigit
3727 int number = Character.getNumericValue(currentCharacter);
3728 if (number >= 0 && number <= 7) {
3729 boolean zeroToThreeNot = number > 3;
3730 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3731 int digit = Character.getNumericValue(currentCharacter);
3732 if (digit >= 0 && digit <= 7) {
3733 number = (number * 8) + digit;
3735 .isDigit(currentCharacter = source[currentPosition++])) {
3736 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3737 // Digit --> ignore last character
3740 digit = Character.getNumericValue(currentCharacter);
3741 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3742 // OctalDigit OctalDigit
3743 number = (number * 8) + digit;
3744 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3745 // --> ignore last character
3749 } else { // has read \OctalDigit NonDigit--> ignore last
3753 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3757 } else { // has read \OctalDigit --> ignore last character
3761 throw new InvalidInputException(INVALID_ESCAPE);
3762 currentCharacter = (char) number;
3764 throw new InvalidInputException(INVALID_ESCAPE);
3767 // chech presence of task: tags
3768 public void checkTaskTag(int commentStart, int commentEnd) {
3769 // only look for newer task: tags
3770 if (this.foundTaskCount > 0
3771 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3774 int foundTaskIndex = this.foundTaskCount;
3775 nextChar : for (int i = commentStart; i < commentEnd
3776 && i < this.eofPosition; i++) {
3778 char[] priority = null;
3779 // check for tag occurrence
3780 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3781 tag = this.taskTags[itag];
3782 priority = this.taskPriorities != null
3783 && itag < this.taskPriorities.length
3784 ? this.taskPriorities[itag]
3786 int tagLength = tag.length;
3787 for (int t = 0; t < tagLength; t++) {
3788 if (this.source[i + t] != tag[t])
3791 if (this.foundTaskTags == null) {
3792 this.foundTaskTags = new char[5][];
3793 this.foundTaskMessages = new char[5][];
3794 this.foundTaskPriorities = new char[5][];
3795 this.foundTaskPositions = new int[5][];
3796 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3797 System.arraycopy(this.foundTaskTags, 0,
3798 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3799 this.foundTaskCount);
3800 System.arraycopy(this.foundTaskMessages, 0,
3801 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3802 this.foundTaskCount);
3803 System.arraycopy(this.foundTaskPriorities, 0,
3804 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3805 0, this.foundTaskCount);
3806 System.arraycopy(this.foundTaskPositions, 0,
3807 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3808 this.foundTaskCount);
3810 this.foundTaskTags[this.foundTaskCount] = tag;
3811 this.foundTaskPriorities[this.foundTaskCount] = priority;
3812 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3814 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3815 this.foundTaskCount++;
3816 i += tagLength - 1; // will be incremented when looping
3819 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3820 // retrieve message start and end positions
3821 int msgStart = this.foundTaskPositions[i][0]
3822 + this.foundTaskTags[i].length;
3823 int max_value = i + 1 < this.foundTaskCount
3824 ? this.foundTaskPositions[i + 1][0] - 1
3826 // at most beginning of next task
3827 if (max_value < msgStart)
3828 max_value = msgStart; // would only occur if tag is before EOF.
3831 for (int j = msgStart; j < max_value; j++) {
3832 if ((c = this.source[j]) == '\n' || c == '\r') {
3838 for (int j = max_value; j > msgStart; j--) {
3839 if ((c = this.source[j]) == '*') {
3847 if (msgStart == end)
3850 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3852 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3854 // update the end position of the task
3855 this.foundTaskPositions[i][1] = end;
3856 // get the message source
3857 final int messageLength = end - msgStart + 1;
3858 char[] message = new char[messageLength];
3859 System.arraycopy(source, msgStart, message, 0, messageLength);
3860 this.foundTaskMessages[i] = message;