1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this
24 * value is not memorized by the scanner) - getCurrentTokenSource() which
25 * provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 // private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 // flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
43 * This token is set to TokenNameecho if a short tag block begins (i.e. >?= ... )
44 * Directly after the "=" character the getNextToken() method returns TokenNameINLINE_HTML
45 * In the next call to the getNextToken() method the value of fFillerToken (==TokenNameecho) is returned
48 int fFillerToken = TokenNameEOF;
50 public char currentCharacter;
52 public int startPosition;
54 public int currentPosition;
56 public int initialPosition, eofPosition;
58 // after this position eof are generated instead of real token from the
60 public boolean tokenizeComments;
62 public boolean tokenizeWhiteSpace;
64 public boolean tokenizeStrings;
66 // source should be viewed as a window (aka a part)
67 // of a entire very large stream
71 public char[] withoutUnicodeBuffer;
73 public int withoutUnicodePtr;
75 // when == 0 ==> no unicode in the current token
76 public boolean unicodeAsBackSlash = false;
78 public boolean scanningFloatLiteral = false;
80 // support for /** comments
81 public int[] commentStops = new int[10];
83 public int[] commentStarts = new int[10];
85 public int commentPtr = -1; // no comment test with commentPtr value -1
87 protected int lastCommentLinePosition = -1;
89 // diet parsing support - jump over some method body when requested
90 public boolean diet = false;
92 // support for the poor-line-debuggers ....
93 // remember the position of the cr/lf
94 public int[] lineEnds = new int[250];
96 public int linePtr = -1;
98 public boolean wasAcr = false;
100 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
102 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
104 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
106 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
108 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
110 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
112 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
114 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
116 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
118 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
120 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
122 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
124 // ----------------optimized identifier managment------------------
125 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
126 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
127 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
128 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
129 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
130 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
131 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
132 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
133 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
135 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
136 '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
137 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
138 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
139 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
140 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
141 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
142 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
143 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
145 public final static int MAX_OBVIOUS = 256;
147 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
149 public final static int C_DOLLAR = 8;
151 public final static int C_LETTER = 4;
153 public final static int C_DIGIT = 3;
155 public final static int C_SEPARATOR = 2;
157 public final static int C_SPACE = 1;
159 for (int i = '0'; i <= '9'; i++)
160 ObviousIdentCharNatures[i] = C_DIGIT;
162 for (int i = 'a'; i <= 'z'; i++)
163 ObviousIdentCharNatures[i] = C_LETTER;
164 for (int i = 'A'; i <= 'Z'; i++)
165 ObviousIdentCharNatures[i] = C_LETTER;
166 ObviousIdentCharNatures['_'] = C_LETTER;
167 for (int i = 127; i <= 255; i++)
168 ObviousIdentCharNatures[i] = C_LETTER;
170 ObviousIdentCharNatures['$'] = C_DOLLAR;
172 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
173 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
174 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
175 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
176 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
178 ObviousIdentCharNatures['.'] = C_SEPARATOR;
179 ObviousIdentCharNatures[':'] = C_SEPARATOR;
180 ObviousIdentCharNatures[';'] = C_SEPARATOR;
181 ObviousIdentCharNatures[','] = C_SEPARATOR;
182 ObviousIdentCharNatures['['] = C_SEPARATOR;
183 ObviousIdentCharNatures[']'] = C_SEPARATOR;
184 ObviousIdentCharNatures['('] = C_SEPARATOR;
185 ObviousIdentCharNatures[')'] = C_SEPARATOR;
186 ObviousIdentCharNatures['{'] = C_SEPARATOR;
187 ObviousIdentCharNatures['}'] = C_SEPARATOR;
188 ObviousIdentCharNatures['+'] = C_SEPARATOR;
189 ObviousIdentCharNatures['-'] = C_SEPARATOR;
190 ObviousIdentCharNatures['*'] = C_SEPARATOR;
191 ObviousIdentCharNatures['/'] = C_SEPARATOR;
192 ObviousIdentCharNatures['='] = C_SEPARATOR;
193 ObviousIdentCharNatures['&'] = C_SEPARATOR;
194 ObviousIdentCharNatures['|'] = C_SEPARATOR;
195 ObviousIdentCharNatures['?'] = C_SEPARATOR;
196 ObviousIdentCharNatures['<'] = C_SEPARATOR;
197 ObviousIdentCharNatures['>'] = C_SEPARATOR;
198 ObviousIdentCharNatures['!'] = C_SEPARATOR;
199 ObviousIdentCharNatures['%'] = C_SEPARATOR;
200 ObviousIdentCharNatures['^'] = C_SEPARATOR;
201 ObviousIdentCharNatures['~'] = C_SEPARATOR;
202 ObviousIdentCharNatures['"'] = C_SEPARATOR;
203 ObviousIdentCharNatures['\''] = C_SEPARATOR;
206 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
208 static final int TableSize = 30, InternalTableSize = 6;
210 // 30*6 = 180 entries
211 public static final int OptimizedLength = 6;
214 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
216 // support for detecting non-externalized string literals
217 int currentLineNr = -1;
219 int previousLineNr = -1;
221 NLSLine currentLine = null;
223 List lines = new ArrayList();
225 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
227 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
229 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
231 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
233 public StringLiteral[] nonNLSStrings = null;
235 public boolean checkNonExternalizedStringLiterals = true;
237 public boolean wasNonExternalizedStringLiteral = false;
240 for (int i = 0; i < 6; i++) {
241 for (int j = 0; j < TableSize; j++) {
242 for (int k = 0; k < InternalTableSize; k++) {
243 charArray_length[i][j][k] = initCharArray;
249 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
251 public static final int RoundBracket = 0;
253 public static final int SquareBracket = 1;
255 public static final int CurlyBracket = 2;
257 public static final int BracketKinds = 3;
260 public char[][] foundTaskTags = null;
262 public char[][] foundTaskMessages;
264 public char[][] foundTaskPriorities = null;
266 public int[][] foundTaskPositions;
268 public int foundTaskCount = 0;
270 public char[][] taskTags = null;
272 public char[][] taskPriorities = null;
274 public boolean isTaskCaseSensitive = true;
276 public static final boolean DEBUG = false;
278 public static final boolean TRACE = false;
280 public ICompilationUnit compilationUnit = null;
283 * Determines if the specified character is permissible as the first character
284 * in a PHP identifier or variable
286 * The '$' character for PHP variables is regarded as a correct first
290 public static boolean isPHPIdentOrVarStart(char ch) {
291 if (ch < MAX_OBVIOUS) {
292 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
295 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
296 // ch && ch <= 0xFF);
300 * Determines if the specified character is permissible as the first character
301 * in a PHP identifier.
303 * The '$' character for PHP variables isn't regarded as the first character !
305 public static boolean isPHPIdentifierStart(char ch) {
306 if (ch < MAX_OBVIOUS) {
307 return ObviousIdentCharNatures[ch] == C_LETTER;
310 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
315 * Determines if the specified character may be part of a PHP identifier as
316 * other than the first character
318 public static boolean isPHPIdentifierPart(char ch) {
319 if (ch < MAX_OBVIOUS) {
320 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
323 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
327 public static boolean isSQLIdentifierPart(char ch) {
328 if (ch < MAX_OBVIOUS) {
329 return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
334 public final boolean atEnd() {
335 // This code is not relevant if source is
336 // Only a part of the real stream input
337 return source.length == currentPosition;
340 public char[] getCurrentIdentifierSource() {
341 // return the token REAL source (aka unicodes are precomputed)
343 // if (withoutUnicodePtr != 0)
344 // //0 is used as a fast test flag so the real first char is in position 1
346 // withoutUnicodeBuffer,
348 // result = new char[withoutUnicodePtr],
350 // withoutUnicodePtr);
352 int length = currentPosition - startPosition;
353 switch (length) { // see OptimizedLength
355 return optimizedCurrentTokenSource1();
357 return optimizedCurrentTokenSource2();
359 return optimizedCurrentTokenSource3();
361 return optimizedCurrentTokenSource4();
363 return optimizedCurrentTokenSource5();
365 return optimizedCurrentTokenSource6();
368 System.arraycopy(source, startPosition, result = new char[length], 0, length);
373 public int getCurrentTokenEndPosition() {
374 return this.currentPosition - 1;
377 public final char[] getCurrentTokenSource() {
378 // Return the token REAL source (aka unicodes are precomputed)
380 // if (withoutUnicodePtr != 0)
381 // // 0 is used as a fast test flag so the real first char is in position 1
383 // withoutUnicodeBuffer,
385 // result = new char[withoutUnicodePtr],
387 // withoutUnicodePtr);
390 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
395 public final char[] getCurrentTokenSource(int startPos) {
396 // Return the token REAL source (aka unicodes are precomputed)
398 // if (withoutUnicodePtr != 0)
399 // // 0 is used as a fast test flag so the real first char is in position 1
401 // withoutUnicodeBuffer,
403 // result = new char[withoutUnicodePtr],
405 // withoutUnicodePtr);
408 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
413 public final char[] getCurrentTokenSourceString() {
414 // return the token REAL source (aka unicodes are precomputed).
415 // REMOVE the two " that are at the beginning and the end.
417 if (withoutUnicodePtr != 0)
418 // 0 is used as a fast test flag so the real first char is in position 1
419 System.arraycopy(withoutUnicodeBuffer, 2,
420 // 2 is 1 (real start) + 1 (to jump over the ")
421 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
424 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
429 public final boolean equalsCurrentTokenSource(char[] word) {
430 if (word.length != currentPosition - startPosition) {
433 for (int i = 0; i < word.length; i++) {
434 if (word[i] != source[startPosition + i]) {
441 public final char[] getRawTokenSourceEnd() {
442 int length = this.eofPosition - this.currentPosition - 1;
443 char[] sourceEnd = new char[length];
444 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
448 public int getCurrentTokenStartPosition() {
449 return this.startPosition;
452 public final String getCurrentStringLiteral() {
453 char[] result = getCurrentStringLiteralSource();
454 return new String(result);
457 public final char[] getCurrentStringLiteralSource() {
458 // Return the token REAL source (aka unicodes are precomputed)
459 if (startPosition + 1 >= currentPosition) {
464 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
469 public final char[] getCurrentStringLiteralSource(int startPos) {
470 // Return the token REAL source (aka unicodes are precomputed)
473 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
479 * Search the source position corresponding to the end of a given line number
481 * Line numbers are 1-based, and relative to the scanner initialPosition.
482 * Character positions are 0-based.
484 * In case the given line number is inconsistent, answers -1.
486 public final int getLineEnd(int lineNumber) {
487 if (lineEnds == null)
489 if (lineNumber >= lineEnds.length)
493 if (lineNumber == lineEnds.length - 1)
495 return lineEnds[lineNumber - 1];
496 // next line start one character behind the lineEnd of the previous line
500 * Search the source position corresponding to the beginning of a given line
503 * Line numbers are 1-based, and relative to the scanner initialPosition.
504 * Character positions are 0-based.
506 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
508 * In case the given line number is inconsistent, answers -1.
510 public final int getLineStart(int lineNumber) {
511 if (lineEnds == null)
513 if (lineNumber >= lineEnds.length)
518 return initialPosition;
519 return lineEnds[lineNumber - 2] + 1;
520 // next line start one character behind the lineEnd of the previous line
523 public final boolean getNextChar(char testedChar) {
525 // handle the case of unicode.
526 // when a unicode appears then we must use a buffer that holds char
528 // At the end of this method currentCharacter holds the new visited char
529 // and currentPosition points right next after it
530 // Both previous lines are true if the currentCharacter is == to the
532 // On false, no side effect has occured.
533 // ALL getNextChar.... ARE OPTIMIZED COPIES
534 int temp = currentPosition;
536 currentCharacter = source[currentPosition++];
537 // if (((currentCharacter = source[currentPosition++]) == '\\')
538 // && (source[currentPosition] == 'u')) {
539 // //-------------unicode traitement ------------
540 // int c1, c2, c3, c4;
541 // int unicodeSize = 6;
542 // currentPosition++;
543 // while (source[currentPosition] == 'u') {
544 // currentPosition++;
548 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
550 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
552 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
554 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
556 // currentPosition = temp;
560 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
561 // if (currentCharacter != testedChar) {
562 // currentPosition = temp;
565 // unicodeAsBackSlash = currentCharacter == '\\';
567 // //need the unicode buffer
568 // if (withoutUnicodePtr == 0) {
569 // //buffer all the entries that have been left aside....
570 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
574 // withoutUnicodeBuffer,
576 // withoutUnicodePtr);
578 // //fill the buffer with the char
579 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
582 // } //-------------end unicode traitement--------------
584 if (currentCharacter != testedChar) {
585 currentPosition = temp;
588 unicodeAsBackSlash = false;
589 // if (withoutUnicodePtr != 0)
590 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
593 } catch (IndexOutOfBoundsException e) {
594 unicodeAsBackSlash = false;
595 currentPosition = temp;
600 public final int getNextChar(char testedChar1, char testedChar2) {
601 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
602 // test can be done with (x==0) for the first and (x>0) for the second
603 // handle the case of unicode.
604 // when a unicode appears then we must use a buffer that holds char
606 // At the end of this method currentCharacter holds the new visited char
607 // and currentPosition points right next after it
608 // Both previous lines are true if the currentCharacter is == to the
610 // On false, no side effect has occured.
611 // ALL getNextChar.... ARE OPTIMIZED COPIES
612 int temp = currentPosition;
615 currentCharacter = source[currentPosition++];
616 // if (((currentCharacter = source[currentPosition++]) == '\\')
617 // && (source[currentPosition] == 'u')) {
618 // //-------------unicode traitement ------------
619 // int c1, c2, c3, c4;
620 // int unicodeSize = 6;
621 // currentPosition++;
622 // while (source[currentPosition] == 'u') {
623 // currentPosition++;
627 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
629 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
631 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
633 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
635 // currentPosition = temp;
639 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
640 // if (currentCharacter == testedChar1)
642 // else if (currentCharacter == testedChar2)
645 // currentPosition = temp;
649 // //need the unicode buffer
650 // if (withoutUnicodePtr == 0) {
651 // //buffer all the entries that have been left aside....
652 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
656 // withoutUnicodeBuffer,
658 // withoutUnicodePtr);
660 // //fill the buffer with the char
661 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
663 // } //-------------end unicode traitement--------------
665 if (currentCharacter == testedChar1)
667 else if (currentCharacter == testedChar2)
670 currentPosition = temp;
673 // if (withoutUnicodePtr != 0)
674 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
677 } catch (IndexOutOfBoundsException e) {
678 currentPosition = temp;
683 public final boolean getNextCharAsDigit() {
685 // handle the case of unicode.
686 // when a unicode appears then we must use a buffer that holds char
688 // At the end of this method currentCharacter holds the new visited char
689 // and currentPosition points right next after it
690 // Both previous lines are true if the currentCharacter is a digit
691 // On false, no side effect has occured.
692 // ALL getNextChar.... ARE OPTIMIZED COPIES
693 int temp = currentPosition;
695 currentCharacter = source[currentPosition++];
696 // if (((currentCharacter = source[currentPosition++]) == '\\')
697 // && (source[currentPosition] == 'u')) {
698 // //-------------unicode traitement ------------
699 // int c1, c2, c3, c4;
700 // int unicodeSize = 6;
701 // currentPosition++;
702 // while (source[currentPosition] == 'u') {
703 // currentPosition++;
707 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
709 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
711 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
713 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
715 // currentPosition = temp;
719 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
720 // if (!Character.isDigit(currentCharacter)) {
721 // currentPosition = temp;
725 // //need the unicode buffer
726 // if (withoutUnicodePtr == 0) {
727 // //buffer all the entries that have been left aside....
728 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
732 // withoutUnicodeBuffer,
734 // withoutUnicodePtr);
736 // //fill the buffer with the char
737 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
739 // } //-------------end unicode traitement--------------
741 if (!Character.isDigit(currentCharacter)) {
742 currentPosition = temp;
745 // if (withoutUnicodePtr != 0)
746 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
749 } catch (IndexOutOfBoundsException e) {
750 currentPosition = temp;
755 public final boolean getNextCharAsDigit(int radix) {
757 // handle the case of unicode.
758 // when a unicode appears then we must use a buffer that holds char
760 // At the end of this method currentCharacter holds the new visited char
761 // and currentPosition points right next after it
762 // Both previous lines are true if the currentCharacter is a digit base on
764 // On false, no side effect has occured.
765 // ALL getNextChar.... ARE OPTIMIZED COPIES
766 int temp = currentPosition;
768 currentCharacter = source[currentPosition++];
769 // if (((currentCharacter = source[currentPosition++]) == '\\')
770 // && (source[currentPosition] == 'u')) {
771 // //-------------unicode traitement ------------
772 // int c1, c2, c3, c4;
773 // int unicodeSize = 6;
774 // currentPosition++;
775 // while (source[currentPosition] == 'u') {
776 // currentPosition++;
780 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
782 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
784 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
786 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
788 // currentPosition = temp;
792 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
793 // if (Character.digit(currentCharacter, radix) == -1) {
794 // currentPosition = temp;
798 // //need the unicode buffer
799 // if (withoutUnicodePtr == 0) {
800 // //buffer all the entries that have been left aside....
801 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
805 // withoutUnicodeBuffer,
807 // withoutUnicodePtr);
809 // //fill the buffer with the char
810 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
812 // } //-------------end unicode traitement--------------
814 if (Character.digit(currentCharacter, radix) == -1) {
815 currentPosition = temp;
818 // if (withoutUnicodePtr != 0)
819 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
822 } catch (IndexOutOfBoundsException e) {
823 currentPosition = temp;
828 public boolean getNextCharAsJavaIdentifierPart() {
830 // handle the case of unicode.
831 // when a unicode appears then we must use a buffer that holds char
833 // At the end of this method currentCharacter holds the new visited char
834 // and currentPosition points right next after it
835 // Both previous lines are true if the currentCharacter is a
836 // JavaIdentifierPart
837 // On false, no side effect has occured.
838 // ALL getNextChar.... ARE OPTIMIZED COPIES
839 int temp = currentPosition;
841 currentCharacter = source[currentPosition++];
842 // if (((currentCharacter = source[currentPosition++]) == '\\')
843 // && (source[currentPosition] == 'u')) {
844 // //-------------unicode traitement ------------
845 // int c1, c2, c3, c4;
846 // int unicodeSize = 6;
847 // currentPosition++;
848 // while (source[currentPosition] == 'u') {
849 // currentPosition++;
853 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
855 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
857 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
859 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
861 // currentPosition = temp;
865 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
866 // if (!isPHPIdentifierPart(currentCharacter)) {
867 // currentPosition = temp;
871 // //need the unicode buffer
872 // if (withoutUnicodePtr == 0) {
873 // //buffer all the entries that have been left aside....
874 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
878 // withoutUnicodeBuffer,
880 // withoutUnicodePtr);
882 // //fill the buffer with the char
883 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
885 // } //-------------end unicode traitement--------------
887 if (!isPHPIdentifierPart(currentCharacter)) {
888 currentPosition = temp;
891 // if (withoutUnicodePtr != 0)
892 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
895 } catch (IndexOutOfBoundsException e) {
896 currentPosition = temp;
901 public int getCastOrParen() {
902 int tempPosition = currentPosition;
903 char tempCharacter = currentCharacter;
904 int tempToken = TokenNameLPAREN;
905 boolean found = false;
906 StringBuffer buf = new StringBuffer();
909 currentCharacter = source[currentPosition++];
910 } while (currentCharacter == ' ' || currentCharacter == '\t');
911 while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
912 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
913 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
914 buf.append(currentCharacter);
915 currentCharacter = source[currentPosition++];
917 if (buf.length() >= 3 && buf.length() <= 7) {
918 char[] data = buf.toString().toCharArray();
920 switch (data.length) {
923 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
925 tempToken = TokenNameintCAST;
930 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
932 tempToken = TokenNameboolCAST;
935 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
937 tempToken = TokenNamedoubleCAST;
943 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
944 && (data[++index] == 'y')) {
946 tempToken = TokenNamearrayCAST;
949 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
950 && (data[++index] == 't')) {
952 tempToken = TokenNameunsetCAST;
955 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
956 && (data[++index] == 't')) {
958 tempToken = TokenNamedoubleCAST;
964 // object string double
965 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
966 && (data[++index] == 'c') && (data[++index] == 't')) {
968 tempToken = TokenNameobjectCAST;
971 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
972 && (data[++index] == 'n') && (data[++index] == 'g')) {
974 tempToken = TokenNamestringCAST;
977 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
978 && (data[++index] == 'l') && (data[++index] == 'e')) {
980 tempToken = TokenNamedoubleCAST;
987 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
988 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
990 tempToken = TokenNameboolCAST;
993 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
994 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
996 tempToken = TokenNameintCAST;
1002 while (currentCharacter == ' ' || currentCharacter == '\t') {
1003 currentCharacter = source[currentPosition++];
1005 if (currentCharacter == ')') {
1010 } catch (IndexOutOfBoundsException e) {
1012 currentCharacter = tempCharacter;
1013 currentPosition = tempPosition;
1014 return TokenNameLPAREN;
1017 public void consumeStringInterpolated() throws InvalidInputException {
1019 // consume next character
1020 unicodeAsBackSlash = false;
1021 currentCharacter = source[currentPosition++];
1022 // if (((currentCharacter = source[currentPosition++]) == '\\')
1023 // && (source[currentPosition] == 'u')) {
1024 // getNextUnicodeChar();
1026 // if (withoutUnicodePtr != 0) {
1027 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1028 // currentCharacter;
1031 while (currentCharacter != '`') {
1032 /** ** in PHP \r and \n are valid in string literals *** */
1033 // if ((currentCharacter == '\n')
1034 // || (currentCharacter == '\r')) {
1035 // // relocate if finding another quote fairly close: thus unicode
1036 // '/u000D' will be fully consumed
1037 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1038 // if (currentPosition + lookAhead == source.length)
1040 // if (source[currentPosition + lookAhead] == '\n')
1042 // if (source[currentPosition + lookAhead] == '\"') {
1043 // currentPosition += lookAhead + 1;
1047 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1049 if (currentCharacter == '\\') {
1050 int escapeSize = currentPosition;
1051 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1052 // scanEscapeCharacter make a side effect on this value and we need
1053 // the previous value few lines down this one
1054 scanDoubleQuotedEscapeCharacter();
1055 escapeSize = currentPosition - escapeSize;
1056 if (withoutUnicodePtr == 0) {
1057 // buffer all the entries that have been left aside....
1058 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1059 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1060 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1061 } else { // overwrite the / in the buffer
1062 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1063 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1064 // where only one is correct
1065 withoutUnicodePtr--;
1068 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1069 if (recordLineSeparator) {
1070 pushLineSeparator();
1073 // consume next character
1074 unicodeAsBackSlash = false;
1075 currentCharacter = source[currentPosition++];
1076 // if (((currentCharacter = source[currentPosition++]) == '\\')
1077 // && (source[currentPosition] == 'u')) {
1078 // getNextUnicodeChar();
1080 if (withoutUnicodePtr != 0) {
1081 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1085 } catch (IndexOutOfBoundsException e) {
1086 // reset end position for error reporting
1087 currentPosition -= 2;
1088 throw new InvalidInputException(UNTERMINATED_STRING);
1089 } catch (InvalidInputException e) {
1090 if (e.getMessage().equals(INVALID_ESCAPE)) {
1091 // relocate if finding another quote fairly close: thus unicode
1092 // '/u000D' will be fully consumed
1093 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1094 if (currentPosition + lookAhead == source.length)
1096 if (source[currentPosition + lookAhead] == '\n')
1098 if (source[currentPosition + lookAhead] == '`') {
1099 currentPosition += lookAhead + 1;
1106 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1107 // //$NON-NLS-?$ where ? is an
1109 if (currentLine == null) {
1110 currentLine = new NLSLine();
1111 lines.add(currentLine);
1113 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1117 public void consumeStringConstant() throws InvalidInputException {
1119 // consume next character
1120 unicodeAsBackSlash = false;
1121 currentCharacter = source[currentPosition++];
1122 // if (((currentCharacter = source[currentPosition++]) == '\\')
1123 // && (source[currentPosition] == 'u')) {
1124 // getNextUnicodeChar();
1126 // if (withoutUnicodePtr != 0) {
1127 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1128 // currentCharacter;
1131 while (currentCharacter != '\'') {
1132 /** ** in PHP \r and \n are valid in string literals *** */
1133 // if ((currentCharacter == '\n')
1134 // || (currentCharacter == '\r')) {
1135 // // relocate if finding another quote fairly close: thus unicode
1136 // '/u000D' will be fully consumed
1137 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1138 // if (currentPosition + lookAhead == source.length)
1140 // if (source[currentPosition + lookAhead] == '\n')
1142 // if (source[currentPosition + lookAhead] == '\"') {
1143 // currentPosition += lookAhead + 1;
1147 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1149 if (currentCharacter == '\\') {
1150 int escapeSize = currentPosition;
1151 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1152 // scanEscapeCharacter make a side effect on this value and we need
1153 // the previous value few lines down this one
1154 scanSingleQuotedEscapeCharacter();
1155 escapeSize = currentPosition - escapeSize;
1156 if (withoutUnicodePtr == 0) {
1157 // buffer all the entries that have been left aside....
1158 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1159 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1160 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1161 } else { // overwrite the / in the buffer
1162 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1163 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1164 // where only one is correct
1165 withoutUnicodePtr--;
1168 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1169 if (recordLineSeparator) {
1170 pushLineSeparator();
1173 // consume next character
1174 unicodeAsBackSlash = false;
1175 currentCharacter = source[currentPosition++];
1176 // if (((currentCharacter = source[currentPosition++]) == '\\')
1177 // && (source[currentPosition] == 'u')) {
1178 // getNextUnicodeChar();
1180 if (withoutUnicodePtr != 0) {
1181 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1185 } catch (IndexOutOfBoundsException e) {
1186 // reset end position for error reporting
1187 currentPosition -= 2;
1188 throw new InvalidInputException(UNTERMINATED_STRING);
1189 } catch (InvalidInputException e) {
1190 if (e.getMessage().equals(INVALID_ESCAPE)) {
1191 // relocate if finding another quote fairly close: thus unicode
1192 // '/u000D' will be fully consumed
1193 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1194 if (currentPosition + lookAhead == source.length)
1196 if (source[currentPosition + lookAhead] == '\n')
1198 if (source[currentPosition + lookAhead] == '\'') {
1199 currentPosition += lookAhead + 1;
1206 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1207 // //$NON-NLS-?$ where ? is an
1209 if (currentLine == null) {
1210 currentLine = new NLSLine();
1211 lines.add(currentLine);
1213 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1217 public void consumeStringLiteral() throws InvalidInputException {
1219 boolean openDollarBrace = false;
1220 // consume next character
1221 unicodeAsBackSlash = false;
1222 currentCharacter = source[currentPosition++];
1223 while (currentCharacter != '"' || openDollarBrace) {
1224 /** ** in PHP \r and \n are valid in string literals *** */
1225 if (currentCharacter == '\\') {
1226 int escapeSize = currentPosition;
1227 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1228 // scanEscapeCharacter make a side effect on this value and we need
1229 // the previous value few lines down this one
1230 scanDoubleQuotedEscapeCharacter();
1231 escapeSize = currentPosition - escapeSize;
1232 if (withoutUnicodePtr == 0) {
1233 // buffer all the entries that have been left aside....
1234 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1235 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1236 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1237 } else { // overwrite the / in the buffer
1238 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1239 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1240 // where only one is correct
1241 withoutUnicodePtr--;
1244 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1245 openDollarBrace = true;
1246 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1247 openDollarBrace = true;
1248 } else if (currentCharacter == '}') {
1249 openDollarBrace = false;
1250 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1251 if (recordLineSeparator) {
1252 pushLineSeparator();
1255 // consume next character
1256 unicodeAsBackSlash = false;
1257 currentCharacter = source[currentPosition++];
1258 if (withoutUnicodePtr != 0) {
1259 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1262 } catch (IndexOutOfBoundsException e) {
1263 // reset end position for error reporting
1264 currentPosition -= 2;
1265 throw new InvalidInputException(UNTERMINATED_STRING);
1266 } catch (InvalidInputException e) {
1267 if (e.getMessage().equals(INVALID_ESCAPE)) {
1268 // relocate if finding another quote fairly close: thus unicode
1269 // '/u000D' will be fully consumed
1270 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1271 if (currentPosition + lookAhead == source.length)
1273 if (source[currentPosition + lookAhead] == '\n')
1275 if (source[currentPosition + lookAhead] == '\"') {
1276 currentPosition += lookAhead + 1;
1283 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1284 // //$NON-NLS-?$ where ? is an
1286 if (currentLine == null) {
1287 currentLine = new NLSLine();
1288 lines.add(currentLine);
1290 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1294 public int getNextToken() throws InvalidInputException {
1296 return getInlinedHTMLToken(currentPosition);
1298 if (fFillerToken != TokenNameEOF) {
1300 tempToken = fFillerToken;
1301 fFillerToken = TokenNameEOF;
1304 this.wasAcr = false;
1306 jumpOverMethodBody();
1308 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1312 withoutUnicodePtr = 0;
1313 // start with a new token
1314 char encapsedChar = ' ';
1315 // if (!encapsedStringStack.isEmpty()) {
1316 // encapsedChar = ((Character)
1317 // encapsedStringStack.peek()).charValue();
1319 // if (encapsedChar != '$' && encapsedChar != ' ') {
1320 // currentCharacter = source[currentPosition++];
1321 // if (currentCharacter == encapsedChar) {
1322 // switch (currentCharacter) {
1324 // return TokenNameEncapsedString0;
1326 // return TokenNameEncapsedString1;
1328 // return TokenNameEncapsedString2;
1331 // while (currentCharacter != encapsedChar) {
1332 // /** ** in PHP \r and \n are valid in string literals *** */
1333 // switch (currentCharacter) {
1335 // int escapeSize = currentPosition;
1336 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1337 // //scanEscapeCharacter make a side effect on this value and
1338 // // we need the previous value few lines down this one
1339 // scanDoubleQuotedEscapeCharacter();
1340 // escapeSize = currentPosition - escapeSize;
1341 // if (withoutUnicodePtr == 0) {
1342 // //buffer all the entries that have been left aside....
1343 // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1345 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1346 // withoutUnicodePtr);
1347 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1348 // } else { //overwrite the / in the buffer
1349 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1350 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1351 // withoutUnicodePtr--;
1357 // if (recordLineSeparator) {
1358 // pushLineSeparator();
1362 // if (isPHPIdentifierStart(source[currentPosition]) ||
1363 // source[currentPosition] == '{') {
1364 // currentPosition--;
1365 // encapsedStringStack.push(new Character('$'));
1366 // return TokenNameSTRING;
1370 // if (source[currentPosition] == '$') { // CURLY_OPEN
1371 // currentPosition--;
1372 // encapsedStringStack.push(new Character('$'));
1373 // return TokenNameSTRING;
1376 // // consume next character
1377 // unicodeAsBackSlash = false;
1378 // currentCharacter = source[currentPosition++];
1379 // if (withoutUnicodePtr != 0) {
1380 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1384 // currentPosition--;
1385 // return TokenNameSTRING;
1387 // ---------Consume white space and handles startPosition---------
1388 int whiteStart = currentPosition;
1389 startPosition = currentPosition;
1390 currentCharacter = source[currentPosition++];
1391 // if (encapsedChar == '$') {
1392 // switch (currentCharacter) {
1394 // currentCharacter = source[currentPosition++];
1395 // return TokenNameSTRING;
1397 // if (encapsedChar == '$') {
1398 // if (getNextChar('$'))
1399 // return TokenNameLBRACE_DOLLAR;
1401 // return TokenNameLBRACE;
1403 // return TokenNameRBRACE;
1405 // return TokenNameLBRACKET;
1407 // return TokenNameRBRACKET;
1409 // if (tokenizeStrings) {
1410 // consumeStringConstant();
1411 // return TokenNameStringSingleQuote;
1413 // return TokenNameEncapsedString1;
1415 // return TokenNameEncapsedString2;
1417 // if (tokenizeStrings) {
1418 // consumeStringInterpolated();
1419 // return TokenNameStringInterpolated;
1421 // return TokenNameEncapsedString0;
1423 // if (getNextChar('>'))
1424 // return TokenNameMINUS_GREATER;
1425 // return TokenNameSTRING;
1427 // if (currentCharacter == '$') {
1428 // int oldPosition = currentPosition;
1430 // currentCharacter = source[currentPosition++];
1431 // if (currentCharacter == '{') {
1432 // return TokenNameDOLLAR_LBRACE;
1434 // if (isPHPIdentifierStart(currentCharacter)) {
1435 // return scanIdentifierOrKeyword(true);
1437 // currentPosition = oldPosition;
1438 // return TokenNameSTRING;
1440 // } catch (IndexOutOfBoundsException e) {
1441 // currentPosition = oldPosition;
1442 // return TokenNameSTRING;
1445 // if (isPHPIdentifierStart(currentCharacter))
1446 // return scanIdentifierOrKeyword(false);
1447 // if (Character.isDigit(currentCharacter))
1448 // return scanNumber(false);
1449 // return TokenNameERROR;
1452 // boolean isWhiteSpace;
1454 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1455 startPosition = currentPosition;
1456 currentCharacter = source[currentPosition++];
1457 // if (((currentCharacter = source[currentPosition++]) == '\\')
1458 // && (source[currentPosition] == 'u')) {
1459 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1461 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1462 checkNonExternalizeString();
1463 if (recordLineSeparator) {
1464 pushLineSeparator();
1469 // isWhiteSpace = (currentCharacter == ' ')
1470 // || Character.isWhitespace(currentCharacter);
1473 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1474 // reposition scanner in case we are interested by spaces as tokens
1476 startPosition = whiteStart;
1477 return TokenNameWHITESPACE;
1479 // little trick to get out in the middle of a source compuation
1480 if (currentPosition > eofPosition)
1481 return TokenNameEOF;
1482 // ---------Identify the next token-------------
1483 switch (currentCharacter) {
1485 return getCastOrParen();
1487 return TokenNameRPAREN;
1489 return TokenNameLBRACE;
1491 return TokenNameRBRACE;
1493 return TokenNameLBRACKET;
1495 return TokenNameRBRACKET;
1497 return TokenNameSEMICOLON;
1499 return TokenNameCOMMA;
1501 if (getNextChar('='))
1502 return TokenNameDOT_EQUAL;
1503 if (getNextCharAsDigit())
1504 return scanNumber(true);
1505 return TokenNameDOT;
1508 if ((test = getNextChar('+', '=')) == 0)
1509 return TokenNamePLUS_PLUS;
1511 return TokenNamePLUS_EQUAL;
1512 return TokenNamePLUS;
1516 if ((test = getNextChar('-', '=')) == 0)
1517 return TokenNameMINUS_MINUS;
1519 return TokenNameMINUS_EQUAL;
1520 if (getNextChar('>'))
1521 return TokenNameMINUS_GREATER;
1522 return TokenNameMINUS;
1525 if (getNextChar('='))
1526 return TokenNameTWIDDLE_EQUAL;
1527 return TokenNameTWIDDLE;
1529 if (getNextChar('=')) {
1530 if (getNextChar('=')) {
1531 return TokenNameNOT_EQUAL_EQUAL;
1533 return TokenNameNOT_EQUAL;
1535 return TokenNameNOT;
1537 if (getNextChar('='))
1538 return TokenNameMULTIPLY_EQUAL;
1539 return TokenNameMULTIPLY;
1541 if (getNextChar('='))
1542 return TokenNameREMAINDER_EQUAL;
1543 return TokenNameREMAINDER;
1545 int oldPosition = currentPosition;
1547 currentCharacter = source[currentPosition++];
1548 } catch (IndexOutOfBoundsException e) {
1549 currentPosition = oldPosition;
1550 return TokenNameLESS;
1552 switch (currentCharacter) {
1554 return TokenNameLESS_EQUAL;
1556 return TokenNameNOT_EQUAL;
1558 if (getNextChar('='))
1559 return TokenNameLEFT_SHIFT_EQUAL;
1560 if (getNextChar('<')) {
1561 currentCharacter = source[currentPosition++];
1562 while (Character.isWhitespace(currentCharacter)) {
1563 currentCharacter = source[currentPosition++];
1565 int heredocStart = currentPosition - 1;
1566 int heredocLength = 0;
1567 if (isPHPIdentifierStart(currentCharacter)) {
1568 currentCharacter = source[currentPosition++];
1570 return TokenNameERROR;
1572 while (isPHPIdentifierPart(currentCharacter)) {
1573 currentCharacter = source[currentPosition++];
1575 heredocLength = currentPosition - heredocStart - 1;
1576 // heredoc end-tag determination
1577 boolean endTag = true;
1580 ch = source[currentPosition++];
1581 if (ch == '\r' || ch == '\n') {
1582 if (recordLineSeparator) {
1583 pushLineSeparator();
1587 for (int i = 0; i < heredocLength; i++) {
1588 if (source[currentPosition + i] != source[heredocStart + i]) {
1594 currentPosition += heredocLength - 1;
1595 currentCharacter = source[currentPosition++];
1596 break; // do...while loop
1602 return TokenNameHEREDOC;
1604 return TokenNameLEFT_SHIFT;
1606 currentPosition = oldPosition;
1607 return TokenNameLESS;
1611 if ((test = getNextChar('=', '>')) == 0)
1612 return TokenNameGREATER_EQUAL;
1614 if ((test = getNextChar('=', '>')) == 0)
1615 return TokenNameRIGHT_SHIFT_EQUAL;
1616 return TokenNameRIGHT_SHIFT;
1618 return TokenNameGREATER;
1621 if (getNextChar('=')) {
1622 if (getNextChar('=')) {
1623 return TokenNameEQUAL_EQUAL_EQUAL;
1625 return TokenNameEQUAL_EQUAL;
1627 if (getNextChar('>'))
1628 return TokenNameEQUAL_GREATER;
1629 return TokenNameEQUAL;
1632 if ((test = getNextChar('&', '=')) == 0)
1633 return TokenNameAND_AND;
1635 return TokenNameAND_EQUAL;
1636 return TokenNameAND;
1640 if ((test = getNextChar('|', '=')) == 0)
1641 return TokenNameOR_OR;
1643 return TokenNameOR_EQUAL;
1647 if (getNextChar('='))
1648 return TokenNameXOR_EQUAL;
1649 return TokenNameXOR;
1651 if (getNextChar('>')) {
1653 if (currentPosition == source.length) {
1655 return TokenNameINLINE_HTML;
1657 return getInlinedHTMLToken(currentPosition - 2);
1659 return TokenNameQUESTION;
1661 if (getNextChar(':'))
1662 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1663 return TokenNameCOLON;
1667 consumeStringConstant();
1668 return TokenNameStringSingleQuote;
1670 // if (tokenizeStrings) {
1671 consumeStringLiteral();
1672 return TokenNameStringDoubleQuote;
1674 // return TokenNameEncapsedString2;
1676 // if (tokenizeStrings) {
1677 consumeStringInterpolated();
1678 return TokenNameStringInterpolated;
1680 // return TokenNameEncapsedString0;
1683 char startChar = currentCharacter;
1684 if (getNextChar('=') && startChar == '/') {
1685 return TokenNameDIVIDE_EQUAL;
1688 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1690 this.lastCommentLinePosition = this.currentPosition;
1691 int endPositionForLineComment = 0;
1692 try { // get the next char
1693 currentCharacter = source[currentPosition++];
1694 // if (((currentCharacter = source[currentPosition++])
1696 // && (source[currentPosition] == 'u')) {
1697 // //-------------unicode traitement ------------
1698 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1699 // currentPosition++;
1700 // while (source[currentPosition] == 'u') {
1701 // currentPosition++;
1704 // Character.getNumericValue(source[currentPosition++]))
1708 // Character.getNumericValue(source[currentPosition++]))
1712 // Character.getNumericValue(source[currentPosition++]))
1716 // Character.getNumericValue(source[currentPosition++]))
1720 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1722 // currentCharacter =
1723 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1726 // handle the \\u case manually into comment
1727 // if (currentCharacter == '\\') {
1728 // if (source[currentPosition] == '\\')
1729 // currentPosition++;
1730 // } //jump over the \\
1731 boolean isUnicode = false;
1732 while (currentCharacter != '\r' && currentCharacter != '\n') {
1733 this.lastCommentLinePosition = this.currentPosition;
1734 if (currentCharacter == '?') {
1735 if (getNextChar('>')) {
1736 // ?> breaks line comments
1737 startPosition = currentPosition - 2;
1739 return TokenNameINLINE_HTML;
1742 // get the next char
1744 currentCharacter = source[currentPosition++];
1745 // if (((currentCharacter = source[currentPosition++])
1747 // && (source[currentPosition] == 'u')) {
1748 // isUnicode = true;
1749 // //-------------unicode traitement ------------
1750 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1751 // currentPosition++;
1752 // while (source[currentPosition] == 'u') {
1753 // currentPosition++;
1756 // Character.getNumericValue(source[currentPosition++]))
1760 // Character.getNumericValue(
1761 // source[currentPosition++]))
1765 // Character.getNumericValue(
1766 // source[currentPosition++]))
1770 // Character.getNumericValue(
1771 // source[currentPosition++]))
1775 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1777 // currentCharacter =
1778 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1781 // handle the \\u case manually into comment
1782 // if (currentCharacter == '\\') {
1783 // if (source[currentPosition] == '\\')
1784 // currentPosition++;
1785 // } //jump over the \\
1788 endPositionForLineComment = currentPosition - 6;
1790 endPositionForLineComment = currentPosition - 1;
1792 // recordComment(false);
1793 recordComment(TokenNameCOMMENT_LINE);
1794 if (this.taskTags != null)
1795 checkTaskTag(this.startPosition, this.currentPosition);
1796 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1797 checkNonExternalizeString();
1798 if (recordLineSeparator) {
1800 pushUnicodeLineSeparator();
1802 pushLineSeparator();
1808 if (tokenizeComments) {
1810 currentPosition = endPositionForLineComment;
1811 // reset one character behind
1813 return TokenNameCOMMENT_LINE;
1815 } catch (IndexOutOfBoundsException e) { // an eof will them
1817 if (tokenizeComments) {
1819 // reset one character behind
1820 return TokenNameCOMMENT_LINE;
1826 // traditional and annotation comment
1827 boolean isJavadoc = false, star = false;
1828 // consume next character
1829 unicodeAsBackSlash = false;
1830 currentCharacter = source[currentPosition++];
1831 // if (((currentCharacter = source[currentPosition++]) ==
1833 // && (source[currentPosition] == 'u')) {
1834 // getNextUnicodeChar();
1836 // if (withoutUnicodePtr != 0) {
1837 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1838 // currentCharacter;
1841 if (currentCharacter == '*') {
1845 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1846 checkNonExternalizeString();
1847 if (recordLineSeparator) {
1848 pushLineSeparator();
1853 try { // get the next char
1854 currentCharacter = source[currentPosition++];
1855 // if (((currentCharacter = source[currentPosition++])
1857 // && (source[currentPosition] == 'u')) {
1858 // //-------------unicode traitement ------------
1859 // getNextUnicodeChar();
1861 // handle the \\u case manually into comment
1862 // if (currentCharacter == '\\') {
1863 // if (source[currentPosition] == '\\')
1864 // currentPosition++;
1865 // //jump over the \\
1867 // empty comment is not a javadoc /**/
1868 if (currentCharacter == '/') {
1871 // loop until end of comment */
1872 while ((currentCharacter != '/') || (!star)) {
1873 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1874 checkNonExternalizeString();
1875 if (recordLineSeparator) {
1876 pushLineSeparator();
1881 star = currentCharacter == '*';
1883 currentCharacter = source[currentPosition++];
1884 // if (((currentCharacter = source[currentPosition++])
1886 // && (source[currentPosition] == 'u')) {
1887 // //-------------unicode traitement ------------
1888 // getNextUnicodeChar();
1890 // handle the \\u case manually into comment
1891 // if (currentCharacter == '\\') {
1892 // if (source[currentPosition] == '\\')
1893 // currentPosition++;
1894 // } //jump over the \\
1896 // recordComment(isJavadoc);
1898 recordComment(TokenNameCOMMENT_PHPDOC);
1900 recordComment(TokenNameCOMMENT_BLOCK);
1903 if (tokenizeComments) {
1905 return TokenNameCOMMENT_PHPDOC;
1906 return TokenNameCOMMENT_BLOCK;
1909 if (this.taskTags != null) {
1910 checkTaskTag(this.startPosition, this.currentPosition);
1912 } catch (IndexOutOfBoundsException e) {
1913 // reset end position for error reporting
1914 currentPosition -= 2;
1915 throw new InvalidInputException(UNTERMINATED_COMMENT);
1919 return TokenNameDIVIDE;
1923 return TokenNameEOF;
1924 // the atEnd may not be <currentPosition == source.length> if
1925 // source is only some part of a real (external) stream
1926 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1928 if (currentCharacter == '$') {
1929 int oldPosition = currentPosition;
1931 currentCharacter = source[currentPosition++];
1932 if (isPHPIdentifierStart(currentCharacter)) {
1933 return scanIdentifierOrKeyword(true);
1935 currentPosition = oldPosition;
1936 return TokenNameDOLLAR;
1938 } catch (IndexOutOfBoundsException e) {
1939 currentPosition = oldPosition;
1940 return TokenNameDOLLAR;
1943 if (isPHPIdentifierStart(currentCharacter))
1944 return scanIdentifierOrKeyword(false);
1945 if (Character.isDigit(currentCharacter))
1946 return scanNumber(false);
1947 return TokenNameERROR;
1950 } // -----------------end switch while try--------------------
1951 catch (IndexOutOfBoundsException e) {
1954 return TokenNameEOF;
1959 * @throws InvalidInputException
1961 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1962 boolean phpShortTag = false; // true, if <?= detected
1963 if (currentPosition > source.length) {
1964 currentPosition = source.length;
1965 return TokenNameEOF;
1967 startPosition = start;
1970 currentCharacter = source[currentPosition++];
1971 if (currentCharacter == '<') {
1972 if (getNextChar('?')) {
1973 currentCharacter = source[currentPosition++];
1974 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1975 if (currentCharacter != '=') { // <?=
1977 phpShortTag = false;
1982 if (ignorePHPOneLiner) { // for CodeFormatter
1983 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1986 fFillerToken = TokenNameecho;
1988 return TokenNameINLINE_HTML;
1993 fFillerToken = TokenNameecho;
1995 return TokenNameINLINE_HTML;
1998 int test = getNextChar('H', 'h');
2000 test = getNextChar('P', 'p');
2003 if (ignorePHPOneLiner) {
2004 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
2006 return TokenNameINLINE_HTML;
2010 return TokenNameINLINE_HTML;
2018 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2019 if (recordLineSeparator) {
2020 pushLineSeparator();
2025 } // -----------------while--------------------
2027 return TokenNameINLINE_HTML;
2028 } // -----------------try--------------------
2029 catch (IndexOutOfBoundsException e) {
2030 startPosition = start;
2034 return TokenNameINLINE_HTML;
2038 * check if the PHP is only in this line (for CodeFormatter)
2042 private int lookAheadLinePHPTag() {
2043 int currentPositionInLine = currentPosition;
2044 char previousCharInLine = ' ';
2045 char currentCharInLine = ' ';
2046 boolean singleQuotedStringActive = false;
2047 boolean doubleQuotedStringActive = false;
2050 // look ahead in this line
2052 previousCharInLine = currentCharInLine;
2053 currentCharInLine = source[currentPositionInLine++];
2054 switch (currentCharInLine) {
2056 if (previousCharInLine == '?') {
2057 // update the scanner's current Position in the source
2058 currentPosition = currentPositionInLine;
2059 // use as "dummy" token
2060 return TokenNameEOF;
2064 if (doubleQuotedStringActive) {
2065 // ignore escaped characters in double quoted strings
2066 previousCharInLine = currentCharInLine;
2067 currentCharInLine = source[currentPositionInLine++];
2070 if (doubleQuotedStringActive) {
2071 doubleQuotedStringActive = false;
2073 if (!singleQuotedStringActive) {
2074 doubleQuotedStringActive = true;
2079 if (singleQuotedStringActive) {
2080 if (previousCharInLine != '\\') {
2081 singleQuotedStringActive = false;
2084 if (!doubleQuotedStringActive) {
2085 singleQuotedStringActive = true;
2091 return TokenNameINLINE_HTML;
2093 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2095 return TokenNameINLINE_HTML;
2099 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2101 return TokenNameINLINE_HTML;
2105 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2107 return TokenNameINLINE_HTML;
2112 } catch (IndexOutOfBoundsException e) {
2114 currentPosition = currentPositionInLine;
2115 return TokenNameINLINE_HTML;
2119 // public final void getNextUnicodeChar()
2120 // throws IndexOutOfBoundsException, InvalidInputException {
2122 // //handle the case of unicode.
2123 // //when a unicode appears then we must use a buffer that holds char
2125 // //At the end of this method currentCharacter holds the new visited char
2126 // //and currentPosition points right next after it
2128 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2130 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2131 // currentPosition++;
2132 // while (source[currentPosition] == 'u') {
2133 // currentPosition++;
2137 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2139 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2141 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2143 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2145 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2147 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2148 // //need the unicode buffer
2149 // if (withoutUnicodePtr == 0) {
2150 // //buffer all the entries that have been left aside....
2151 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2152 // System.arraycopy(
2155 // withoutUnicodeBuffer,
2157 // withoutUnicodePtr);
2159 // //fill the buffer with the char
2160 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2162 // unicodeAsBackSlash = currentCharacter == '\\';
2165 * Tokenize a method body, assuming that curly brackets are properly balanced.
2167 public final void jumpOverMethodBody() {
2168 this.wasAcr = false;
2171 while (true) { // loop for jumping over comments
2172 // ---------Consume white space and handles startPosition---------
2173 boolean isWhiteSpace;
2175 startPosition = currentPosition;
2176 currentCharacter = source[currentPosition++];
2177 // if (((currentCharacter = source[currentPosition++]) == '\\')
2178 // && (source[currentPosition] == 'u')) {
2179 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2181 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2182 pushLineSeparator();
2183 isWhiteSpace = Character.isWhitespace(currentCharacter);
2185 } while (isWhiteSpace);
2186 // -------consume token until } is found---------
2187 switch (currentCharacter) {
2198 test = getNextChar('\\');
2201 scanDoubleQuotedEscapeCharacter();
2202 } catch (InvalidInputException ex) {
2206 // try { // consume next character
2207 unicodeAsBackSlash = false;
2208 currentCharacter = source[currentPosition++];
2209 // if (((currentCharacter = source[currentPosition++]) == '\\')
2210 // && (source[currentPosition] == 'u')) {
2211 // getNextUnicodeChar();
2213 if (withoutUnicodePtr != 0) {
2214 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2217 // } catch (InvalidInputException ex) {
2225 // try { // consume next character
2226 unicodeAsBackSlash = false;
2227 currentCharacter = source[currentPosition++];
2228 // if (((currentCharacter = source[currentPosition++]) == '\\')
2229 // && (source[currentPosition] == 'u')) {
2230 // getNextUnicodeChar();
2232 if (withoutUnicodePtr != 0) {
2233 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2236 // } catch (InvalidInputException ex) {
2238 while (currentCharacter != '"') {
2239 if (currentCharacter == '\r') {
2240 if (source[currentPosition] == '\n')
2243 // the string cannot go further that the line
2245 if (currentCharacter == '\n') {
2247 // the string cannot go further that the line
2249 if (currentCharacter == '\\') {
2251 scanDoubleQuotedEscapeCharacter();
2252 } catch (InvalidInputException ex) {
2256 // try { // consume next character
2257 unicodeAsBackSlash = false;
2258 currentCharacter = source[currentPosition++];
2259 // if (((currentCharacter = source[currentPosition++]) == '\\')
2260 // && (source[currentPosition] == 'u')) {
2261 // getNextUnicodeChar();
2263 if (withoutUnicodePtr != 0) {
2264 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2267 // } catch (InvalidInputException ex) {
2270 } catch (IndexOutOfBoundsException e) {
2276 if ((test = getNextChar('/', '*')) == 0) {
2279 // get the next char
2280 currentCharacter = source[currentPosition++];
2281 // if (((currentCharacter = source[currentPosition++]) ==
2283 // && (source[currentPosition] == 'u')) {
2284 // //-------------unicode traitement ------------
2285 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2286 // currentPosition++;
2287 // while (source[currentPosition] == 'u') {
2288 // currentPosition++;
2291 // Character.getNumericValue(source[currentPosition++]))
2295 // Character.getNumericValue(source[currentPosition++]))
2299 // Character.getNumericValue(source[currentPosition++]))
2303 // Character.getNumericValue(source[currentPosition++]))
2306 // //error don't care of the value
2307 // currentCharacter = 'A';
2308 // } //something different from \n and \r
2310 // currentCharacter =
2311 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2314 while (currentCharacter != '\r' && currentCharacter != '\n') {
2315 // get the next char
2316 currentCharacter = source[currentPosition++];
2317 // if (((currentCharacter = source[currentPosition++])
2319 // && (source[currentPosition] == 'u')) {
2320 // //-------------unicode traitement ------------
2321 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2322 // currentPosition++;
2323 // while (source[currentPosition] == 'u') {
2324 // currentPosition++;
2327 // Character.getNumericValue(source[currentPosition++]))
2331 // Character.getNumericValue(source[currentPosition++]))
2335 // Character.getNumericValue(source[currentPosition++]))
2339 // Character.getNumericValue(source[currentPosition++]))
2342 // //error don't care of the value
2343 // currentCharacter = 'A';
2344 // } //something different from \n and \r
2346 // currentCharacter =
2347 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2351 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2352 pushLineSeparator();
2353 } catch (IndexOutOfBoundsException e) {
2354 } // an eof will them be generated
2358 // traditional and annotation comment
2359 boolean star = false;
2360 // try { // consume next character
2361 unicodeAsBackSlash = false;
2362 currentCharacter = source[currentPosition++];
2363 // if (((currentCharacter = source[currentPosition++]) == '\\')
2364 // && (source[currentPosition] == 'u')) {
2365 // getNextUnicodeChar();
2367 if (withoutUnicodePtr != 0) {
2368 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2371 // } catch (InvalidInputException ex) {
2373 if (currentCharacter == '*') {
2376 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2377 pushLineSeparator();
2378 try { // get the next char
2379 currentCharacter = source[currentPosition++];
2380 // if (((currentCharacter = source[currentPosition++]) ==
2382 // && (source[currentPosition] == 'u')) {
2383 // //-------------unicode traitement ------------
2384 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2385 // currentPosition++;
2386 // while (source[currentPosition] == 'u') {
2387 // currentPosition++;
2390 // Character.getNumericValue(source[currentPosition++]))
2394 // Character.getNumericValue(source[currentPosition++]))
2398 // Character.getNumericValue(source[currentPosition++]))
2402 // Character.getNumericValue(source[currentPosition++]))
2405 // //error don't care of the value
2406 // currentCharacter = 'A';
2407 // } //something different from * and /
2409 // currentCharacter =
2410 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2413 // loop until end of comment */
2414 while ((currentCharacter != '/') || (!star)) {
2415 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2416 pushLineSeparator();
2417 star = currentCharacter == '*';
2419 currentCharacter = source[currentPosition++];
2420 // if (((currentCharacter = source[currentPosition++])
2422 // && (source[currentPosition] == 'u')) {
2423 // //-------------unicode traitement ------------
2424 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2425 // currentPosition++;
2426 // while (source[currentPosition] == 'u') {
2427 // currentPosition++;
2430 // Character.getNumericValue(source[currentPosition++]))
2434 // Character.getNumericValue(source[currentPosition++]))
2438 // Character.getNumericValue(source[currentPosition++]))
2442 // Character.getNumericValue(source[currentPosition++]))
2445 // //error don't care of the value
2446 // currentCharacter = 'A';
2447 // } //something different from * and /
2449 // currentCharacter =
2450 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2454 } catch (IndexOutOfBoundsException e) {
2462 if (isPHPIdentOrVarStart(currentCharacter)) {
2464 scanIdentifierOrKeyword((currentCharacter == '$'));
2465 } catch (InvalidInputException ex) {
2470 if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2471 // if (Character.isDigit(currentCharacter)) {
2474 } catch (InvalidInputException ex) {
2481 // -----------------end switch while try--------------------
2482 } catch (IndexOutOfBoundsException e) {
2483 } catch (InvalidInputException e) {
2488 // public final boolean jumpOverUnicodeWhiteSpace()
2489 // throws InvalidInputException {
2491 // //handle the case of unicode. Jump over the next whiteSpace
2492 // //making startPosition pointing on the next available char
2493 // //On false, the currentCharacter is filled up with a potential
2497 // this.wasAcr = false;
2498 // int c1, c2, c3, c4;
2499 // int unicodeSize = 6;
2500 // currentPosition++;
2501 // while (source[currentPosition] == 'u') {
2502 // currentPosition++;
2506 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2508 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2510 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2512 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2514 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2517 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2518 // if (recordLineSeparator
2519 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2520 // pushLineSeparator();
2521 // if (Character.isWhitespace(currentCharacter))
2524 // //buffer the new char which is not a white space
2525 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2526 // //withoutUnicodePtr == 1 is true here
2528 // } catch (IndexOutOfBoundsException e) {
2529 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2532 public final int[] getLineEnds() {
2533 // return a bounded copy of this.lineEnds
2535 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2539 public char[] getSource() {
2543 public static boolean isIdentifierOrKeyword(int token) {
2544 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2547 final char[] optimizedCurrentTokenSource1() {
2548 // return always the same char[] build only once
2549 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2550 char charOne = source[startPosition];
2605 return new char[] { charOne };
2609 final char[] optimizedCurrentTokenSource2() {
2611 c0 = source[startPosition];
2612 c1 = source[startPosition + 1];
2614 // return always the same char[] build only once
2615 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2618 return charArray_va;
2620 return charArray_vb;
2622 return charArray_vc;
2624 return charArray_vd;
2626 return charArray_ve;
2628 return charArray_vf;
2630 return charArray_vg;
2632 return charArray_vh;
2634 return charArray_vi;
2636 return charArray_vj;
2638 return charArray_vk;
2640 return charArray_vl;
2642 return charArray_vm;
2644 return charArray_vn;
2646 return charArray_vo;
2648 return charArray_vp;
2650 return charArray_vq;
2652 return charArray_vr;
2654 return charArray_vs;
2656 return charArray_vt;
2658 return charArray_vu;
2660 return charArray_vv;
2662 return charArray_vw;
2664 return charArray_vx;
2666 return charArray_vy;
2668 return charArray_vz;
2671 // try to return the same char[] build only once
2672 int hash = ((c0 << 6) + c1) % TableSize;
2673 char[][] table = charArray_length[0][hash];
2675 while (++i < InternalTableSize) {
2676 char[] charArray = table[i];
2677 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2680 // ---------other side---------
2682 int max = newEntry2;
2683 while (++i <= max) {
2684 char[] charArray = table[i];
2685 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2688 // --------add the entry-------
2689 if (++max >= InternalTableSize)
2692 table[max] = (r = new char[] { c0, c1 });
2697 final char[] optimizedCurrentTokenSource3() {
2698 // try to return the same char[] build only once
2700 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2702 char[][] table = charArray_length[1][hash];
2704 while (++i < InternalTableSize) {
2705 char[] charArray = table[i];
2706 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2709 // ---------other side---------
2711 int max = newEntry3;
2712 while (++i <= max) {
2713 char[] charArray = table[i];
2714 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2717 // --------add the entry-------
2718 if (++max >= InternalTableSize)
2721 table[max] = (r = new char[] { c0, c1, c2 });
2726 final char[] optimizedCurrentTokenSource4() {
2727 // try to return the same char[] build only once
2728 char c0, c1, c2, c3;
2729 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2730 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2732 char[][] table = charArray_length[2][(int) hash];
2734 while (++i < InternalTableSize) {
2735 char[] charArray = table[i];
2736 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2739 // ---------other side---------
2741 int max = newEntry4;
2742 while (++i <= max) {
2743 char[] charArray = table[i];
2744 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2747 // --------add the entry-------
2748 if (++max >= InternalTableSize)
2751 table[max] = (r = new char[] { c0, c1, c2, c3 });
2756 final char[] optimizedCurrentTokenSource5() {
2757 // try to return the same char[] build only once
2758 char c0, c1, c2, c3, c4;
2759 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2760 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2762 char[][] table = charArray_length[3][(int) hash];
2764 while (++i < InternalTableSize) {
2765 char[] charArray = table[i];
2766 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2769 // ---------other side---------
2771 int max = newEntry5;
2772 while (++i <= max) {
2773 char[] charArray = table[i];
2774 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2777 // --------add the entry-------
2778 if (++max >= InternalTableSize)
2781 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2786 final char[] optimizedCurrentTokenSource6() {
2787 // try to return the same char[] build only once
2788 char c0, c1, c2, c3, c4, c5;
2789 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2790 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2791 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2793 char[][] table = charArray_length[4][(int) hash];
2795 while (++i < InternalTableSize) {
2796 char[] charArray = table[i];
2797 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2798 && (c5 == charArray[5]))
2801 // ---------other side---------
2803 int max = newEntry6;
2804 while (++i <= max) {
2805 char[] charArray = table[i];
2806 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2807 && (c5 == charArray[5]))
2810 // --------add the entry-------
2811 if (++max >= InternalTableSize)
2814 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2819 public final void pushLineSeparator() throws InvalidInputException {
2820 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2821 final int INCREMENT = 250;
2822 if (this.checkNonExternalizedStringLiterals) {
2823 // reinitialize the current line for non externalize strings purpose
2826 // currentCharacter is at position currentPosition-1
2828 if (currentCharacter == '\r') {
2829 int separatorPos = currentPosition - 1;
2830 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2832 // System.out.println("CR-" + separatorPos);
2834 lineEnds[++linePtr] = separatorPos;
2835 } catch (IndexOutOfBoundsException e) {
2836 // linePtr value is correct
2837 int oldLength = lineEnds.length;
2838 int[] old = lineEnds;
2839 lineEnds = new int[oldLength + INCREMENT];
2840 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2841 lineEnds[linePtr] = separatorPos;
2843 // look-ahead for merged cr+lf
2845 if (source[currentPosition] == '\n') {
2846 // System.out.println("look-ahead LF-" + currentPosition);
2847 lineEnds[linePtr] = currentPosition;
2853 } catch (IndexOutOfBoundsException e) {
2858 if (currentCharacter == '\n') {
2859 // must merge eventual cr followed by lf
2860 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2861 // System.out.println("merge LF-" + (currentPosition - 1));
2862 lineEnds[linePtr] = currentPosition - 1;
2864 int separatorPos = currentPosition - 1;
2865 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2867 // System.out.println("LF-" + separatorPos);
2869 lineEnds[++linePtr] = separatorPos;
2870 } catch (IndexOutOfBoundsException e) {
2871 // linePtr value is correct
2872 int oldLength = lineEnds.length;
2873 int[] old = lineEnds;
2874 lineEnds = new int[oldLength + INCREMENT];
2875 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2876 lineEnds[linePtr] = separatorPos;
2884 public final void pushUnicodeLineSeparator() {
2885 // isUnicode means that the \r or \n has been read as a unicode character
2886 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2887 final int INCREMENT = 250;
2888 // currentCharacter is at position currentPosition-1
2889 if (this.checkNonExternalizedStringLiterals) {
2890 // reinitialize the current line for non externalize strings purpose
2894 if (currentCharacter == '\r') {
2895 int separatorPos = currentPosition - 6;
2896 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2898 // System.out.println("CR-" + separatorPos);
2900 lineEnds[++linePtr] = separatorPos;
2901 } catch (IndexOutOfBoundsException e) {
2902 // linePtr value is correct
2903 int oldLength = lineEnds.length;
2904 int[] old = lineEnds;
2905 lineEnds = new int[oldLength + INCREMENT];
2906 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2907 lineEnds[linePtr] = separatorPos;
2909 // look-ahead for merged cr+lf
2910 if (source[currentPosition] == '\n') {
2911 // System.out.println("look-ahead LF-" + currentPosition);
2912 lineEnds[linePtr] = currentPosition;
2920 if (currentCharacter == '\n') {
2921 // must merge eventual cr followed by lf
2922 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2923 // System.out.println("merge LF-" + (currentPosition - 1));
2924 lineEnds[linePtr] = currentPosition - 6;
2926 int separatorPos = currentPosition - 6;
2927 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2929 // System.out.println("LF-" + separatorPos);
2931 lineEnds[++linePtr] = separatorPos;
2932 } catch (IndexOutOfBoundsException e) {
2933 // linePtr value is correct
2934 int oldLength = lineEnds.length;
2935 int[] old = lineEnds;
2936 lineEnds = new int[oldLength + INCREMENT];
2937 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2938 lineEnds[linePtr] = separatorPos;
2946 public void recordComment(int token) {
2948 int stopPosition = this.currentPosition;
2950 case TokenNameCOMMENT_LINE:
2951 stopPosition = -this.lastCommentLinePosition;
2953 case TokenNameCOMMENT_BLOCK:
2954 stopPosition = -this.currentPosition;
2958 // a new comment is recorded
2959 int length = this.commentStops.length;
2960 if (++this.commentPtr >= length) {
2961 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2962 // grows the positions buffers too
2963 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2965 this.commentStops[this.commentPtr] = stopPosition;
2966 this.commentStarts[this.commentPtr] = this.startPosition;
2969 // public final void recordComment(boolean isJavadoc) {
2970 // // a new annotation comment is recorded
2972 // commentStops[++commentPtr] = isJavadoc
2973 // ? currentPosition
2974 // : -currentPosition;
2975 // } catch (IndexOutOfBoundsException e) {
2976 // int oldStackLength = commentStops.length;
2977 // int[] oldStack = commentStops;
2978 // commentStops = new int[oldStackLength + 30];
2979 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2980 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2981 // //grows the positions buffers too
2982 // int[] old = commentStarts;
2983 // commentStarts = new int[oldStackLength + 30];
2984 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2986 // //the buffer is of a correct size here
2987 // commentStarts[commentPtr] = startPosition;
2989 public void resetTo(int begin, int end) {
2990 // reset the scanner to a given position where it may rescan again
2992 initialPosition = startPosition = currentPosition = begin;
2993 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2994 commentPtr = -1; // reset comment stack
2997 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2998 // the string with "\\u" is a legal string of two chars \ and u
2999 // thus we use a direct access to the source (for regular cases).
3000 // if (unicodeAsBackSlash) {
3001 // // consume next character
3002 // unicodeAsBackSlash = false;
3003 // if (((currentCharacter = source[currentPosition++]) == '\\')
3004 // && (source[currentPosition] == 'u')) {
3005 // getNextUnicodeChar();
3007 // if (withoutUnicodePtr != 0) {
3008 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3012 currentCharacter = source[currentPosition++];
3013 switch (currentCharacter) {
3015 currentCharacter = '\'';
3018 currentCharacter = '\\';
3021 currentCharacter = '\\';
3026 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3027 currentCharacter = source[currentPosition++];
3028 switch (currentCharacter) {
3030 // currentCharacter = '\b';
3033 currentCharacter = '\t';
3036 currentCharacter = '\n';
3039 // currentCharacter = '\f';
3042 currentCharacter = '\r';
3045 currentCharacter = '\"';
3048 currentCharacter = '\'';
3051 currentCharacter = '\\';
3054 currentCharacter = '$';
3057 // -----------octal escape--------------
3059 // OctalDigit OctalDigit
3060 // ZeroToThree OctalDigit OctalDigit
3061 int number = Character.getNumericValue(currentCharacter);
3062 if (number >= 0 && number <= 7) {
3063 boolean zeroToThreeNot = number > 3;
3064 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3065 int digit = Character.getNumericValue(currentCharacter);
3066 if (digit >= 0 && digit <= 7) {
3067 number = (number * 8) + digit;
3068 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3069 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3070 // Digit --> ignore last character
3073 digit = Character.getNumericValue(currentCharacter);
3074 if (digit >= 0 && digit <= 7) {
3075 // has read \ZeroToThree OctalDigit OctalDigit
3076 number = (number * 8) + digit;
3077 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3078 // --> ignore last character
3082 } else { // has read \OctalDigit NonDigit--> ignore last
3086 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3090 } else { // has read \OctalDigit --> ignore last character
3094 throw new InvalidInputException(INVALID_ESCAPE);
3095 currentCharacter = (char) number;
3098 // throw new InvalidInputException(INVALID_ESCAPE);
3102 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3103 // return scanIdentifierOrKeyword( false );
3105 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3107 // first dispatch on the first char.
3108 // then the length. If there are several
3109 // keywors with the same length AND the same first char, then do another
3110 // disptach on the second char :-)...cool....but fast !
3111 useAssertAsAnIndentifier = false;
3112 while (getNextCharAsJavaIdentifierPart()) {
3116 // if (new String(getCurrentTokenSource()).equals("$this")) {
3117 // return TokenNamethis;
3119 return TokenNameVariable;
3124 // if (withoutUnicodePtr == 0)
3125 // quick test on length == 1 but not on length > 12 while most identifier
3126 // have a length which is <= 12...but there are lots of identifier with
3127 // only one char....
3129 if ((length = currentPosition - startPosition) == 1)
3130 return TokenNameIdentifier;
3132 data = new char[length];
3133 index = startPosition;
3134 for (int i = 0; i < length; i++) {
3135 data[i] = Character.toLowerCase(source[index + i]);
3139 // if ((length = withoutUnicodePtr) == 1)
3140 // return TokenNameIdentifier;
3141 // // data = withoutUnicodeBuffer;
3142 // data = new char[withoutUnicodeBuffer.length];
3143 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3144 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3148 firstLetter = data[index];
3149 switch (firstLetter) {
3154 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3155 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3156 return TokenNameFILE;
3157 index = 0; // __LINE__
3158 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3159 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3160 return TokenNameLINE;
3164 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3165 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3166 return TokenNameCLASS_C;
3170 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3171 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3172 && (data[++index] == '_'))
3173 return TokenNameMETHOD_C;
3177 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3178 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3179 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3180 return TokenNameFUNC_C;
3183 return TokenNameIdentifier;
3185 // as and array abstract
3189 if ((data[++index] == 's')) {
3192 return TokenNameIdentifier;
3196 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3197 return TokenNameand;
3199 return TokenNameIdentifier;
3203 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3204 return TokenNamearray;
3206 return TokenNameIdentifier;
3208 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3209 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3210 return TokenNameabstract;
3212 return TokenNameIdentifier;
3214 return TokenNameIdentifier;
3220 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3221 return TokenNamebreak;
3223 return TokenNameIdentifier;
3225 return TokenNameIdentifier;
3228 // case catch class clone const continue
3231 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3232 return TokenNamecase;
3234 return TokenNameIdentifier;
3236 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3237 return TokenNamecatch;
3239 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3240 return TokenNameclass;
3242 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3243 return TokenNameclone;
3245 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3246 return TokenNameconst;
3248 return TokenNameIdentifier;
3250 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3251 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3252 return TokenNamecontinue;
3254 return TokenNameIdentifier;
3256 return TokenNameIdentifier;
3259 // declare default do die
3260 // TODO delete define ==> no keyword !
3263 if ((data[++index] == 'o'))
3266 return TokenNameIdentifier;
3268 // if ((data[++index] == 'e')
3269 // && (data[++index] == 'f')
3270 // && (data[++index] == 'i')
3271 // && (data[++index] == 'n')
3272 // && (data[++index] == 'e'))
3273 // return TokenNamedefine;
3275 // return TokenNameIdentifier;
3277 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3278 && (data[++index] == 'r') && (data[++index] == 'e'))
3279 return TokenNamedeclare;
3281 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3282 && (data[++index] == 'l') && (data[++index] == 't'))
3283 return TokenNamedefault;
3285 return TokenNameIdentifier;
3287 return TokenNameIdentifier;
3290 // echo else exit elseif extends eval
3293 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3294 return TokenNameecho;
3295 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3296 return TokenNameelse;
3297 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3298 return TokenNameexit;
3299 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3300 return TokenNameeval;
3302 return TokenNameIdentifier;
3305 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3306 return TokenNameendif;
3307 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3308 return TokenNameempty;
3310 return TokenNameIdentifier;
3313 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3314 && (data[++index] == 'r'))
3315 return TokenNameendfor;
3316 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3317 && (data[++index] == 'f'))
3318 return TokenNameelseif;
3320 return TokenNameIdentifier;
3322 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3323 && (data[++index] == 'd') && (data[++index] == 's'))
3324 return TokenNameextends;
3326 return TokenNameIdentifier;
3329 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3330 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3331 return TokenNameendwhile;
3333 return TokenNameIdentifier;
3336 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3337 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3338 return TokenNameendswitch;
3340 return TokenNameIdentifier;
3343 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3344 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3345 && (data[++index] == 'e'))
3346 return TokenNameenddeclare;
3348 if ((data[++index] == 'n') // endforeach
3349 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3350 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3351 return TokenNameendforeach;
3353 return TokenNameIdentifier;
3355 return TokenNameIdentifier;
3358 // for false final function
3361 if ((data[++index] == 'o') && (data[++index] == 'r'))
3362 return TokenNamefor;
3364 return TokenNameIdentifier;
3366 // if ((data[++index] == 'a') && (data[++index] == 'l')
3367 // && (data[++index] == 's') && (data[++index] == 'e'))
3368 // return TokenNamefalse;
3369 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3370 return TokenNamefinal;
3372 return TokenNameIdentifier;
3375 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3376 && (data[++index] == 'c') && (data[++index] == 'h'))
3377 return TokenNameforeach;
3379 return TokenNameIdentifier;
3382 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3383 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3384 return TokenNamefunction;
3386 return TokenNameIdentifier;
3388 return TokenNameIdentifier;
3393 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3394 && (data[++index] == 'l')) {
3395 return TokenNameglobal;
3398 return TokenNameIdentifier;
3400 // if int isset include include_once instanceof interface implements
3403 if (data[++index] == 'f')
3406 return TokenNameIdentifier;
3408 // if ((data[++index] == 'n') && (data[++index] == 't'))
3409 // return TokenNameint;
3411 // return TokenNameIdentifier;
3413 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3414 return TokenNameisset;
3416 return TokenNameIdentifier;
3418 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3419 && (data[++index] == 'd') && (data[++index] == 'e'))
3420 return TokenNameinclude;
3422 return TokenNameIdentifier;
3425 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3426 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3427 return TokenNameinterface;
3429 return TokenNameIdentifier;
3432 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3433 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3434 && (data[++index] == 'f'))
3435 return TokenNameinstanceof;
3436 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3437 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3438 && (data[++index] == 's'))
3439 return TokenNameimplements;
3441 return TokenNameIdentifier;
3443 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3444 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3445 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3446 return TokenNameinclude_once;
3448 return TokenNameIdentifier;
3450 return TokenNameIdentifier;
3455 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3456 return TokenNamelist;
3459 return TokenNameIdentifier;
3464 if ((data[++index] == 'e') && (data[++index] == 'w'))
3465 return TokenNamenew;
3467 return TokenNameIdentifier;
3469 // if ((data[++index] == 'u') && (data[++index] == 'l')
3470 // && (data[++index] == 'l'))
3471 // return TokenNamenull;
3473 // return TokenNameIdentifier;
3475 return TokenNameIdentifier;
3480 if (data[++index] == 'r') {
3484 // if (length == 12) {
3485 // if ((data[++index] == 'l')
3486 // && (data[++index] == 'd')
3487 // && (data[++index] == '_')
3488 // && (data[++index] == 'f')
3489 // && (data[++index] == 'u')
3490 // && (data[++index] == 'n')
3491 // && (data[++index] == 'c')
3492 // && (data[++index] == 't')
3493 // && (data[++index] == 'i')
3494 // && (data[++index] == 'o')
3495 // && (data[++index] == 'n')) {
3496 // return TokenNameold_function;
3499 return TokenNameIdentifier;
3501 // print public private protected
3504 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3505 return TokenNameprint;
3507 return TokenNameIdentifier;
3509 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3510 && (data[++index] == 'c')) {
3511 return TokenNamepublic;
3513 return TokenNameIdentifier;
3515 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3516 && (data[++index] == 't') && (data[++index] == 'e')) {
3517 return TokenNameprivate;
3519 return TokenNameIdentifier;
3521 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3522 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3523 return TokenNameprotected;
3525 return TokenNameIdentifier;
3527 return TokenNameIdentifier;
3529 // return require require_once
3531 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3532 && (data[++index] == 'n')) {
3533 return TokenNamereturn;
3535 } else if (length == 7) {
3536 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3537 && (data[++index] == 'r') && (data[++index] == 'e')) {
3538 return TokenNamerequire;
3540 } else if (length == 12) {
3541 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3542 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3543 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3544 return TokenNamerequire_once;
3547 return TokenNameIdentifier;
3549 // self static switch
3552 // if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index]
3554 // return TokenNameself;
3556 // return TokenNameIdentifier;
3558 if (data[++index] == 't')
3559 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3560 return TokenNamestatic;
3562 return TokenNameIdentifier;
3563 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3564 && (data[++index] == 'h'))
3565 return TokenNameswitch;
3567 return TokenNameIdentifier;
3569 return TokenNameIdentifier;
3575 if ((data[++index] == 'r') && (data[++index] == 'y'))
3576 return TokenNametry;
3578 return TokenNameIdentifier;
3580 // if ((data[++index] == 'r') && (data[++index] == 'u')
3581 // && (data[++index] == 'e'))
3582 // return TokenNametrue;
3584 // return TokenNameIdentifier;
3586 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3587 return TokenNamethrow;
3589 return TokenNameIdentifier;
3591 return TokenNameIdentifier;
3597 if ((data[++index] == 's') && (data[++index] == 'e'))
3598 return TokenNameuse;
3600 return TokenNameIdentifier;
3602 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3603 return TokenNameunset;
3605 return TokenNameIdentifier;
3607 return TokenNameIdentifier;
3613 if ((data[++index] == 'a') && (data[++index] == 'r'))
3614 return TokenNamevar;
3616 return TokenNameIdentifier;
3618 return TokenNameIdentifier;
3624 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3625 return TokenNamewhile;
3627 return TokenNameIdentifier;
3628 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3629 // (data[++index]=='e') && (data[++index]=='f')&&
3630 // (data[++index]=='p'))
3631 // return TokenNamewidefp ;
3633 // return TokenNameIdentifier;
3635 return TokenNameIdentifier;
3641 if ((data[++index] == 'o') && (data[++index] == 'r'))
3642 return TokenNamexor;
3644 return TokenNameIdentifier;
3646 return TokenNameIdentifier;
3649 return TokenNameIdentifier;
3653 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3654 // when entering this method the currentCharacter is the firt
3655 // digit of the number , i.e. it may be preceeded by a . when
3656 // dotPrefix is true
3657 boolean floating = dotPrefix;
3658 if ((!dotPrefix) && (currentCharacter == '0')) {
3659 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3660 // force the first char of the hexa number do exist...
3661 // consume next character
3662 unicodeAsBackSlash = false;
3663 currentCharacter = source[currentPosition++];
3664 // if (((currentCharacter = source[currentPosition++]) == '\\')
3665 // && (source[currentPosition] == 'u')) {
3666 // getNextUnicodeChar();
3668 // if (withoutUnicodePtr != 0) {
3669 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3672 if (Character.digit(currentCharacter, 16) == -1)
3673 throw new InvalidInputException(INVALID_HEXA);
3675 while (getNextCharAsDigit(16)) {
3678 // if (getNextChar('l', 'L') >= 0)
3679 // return TokenNameLongLiteral;
3681 return TokenNameIntegerLiteral;
3683 // there is x or X in the number
3684 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3685 // 00078.0 is true !!!!! crazy language
3686 if (getNextCharAsDigit()) {
3687 // -------------potential octal-----------------
3688 while (getNextCharAsDigit()) {
3691 // if (getNextChar('l', 'L') >= 0) {
3692 // return TokenNameLongLiteral;
3695 // if (getNextChar('f', 'F') >= 0) {
3696 // return TokenNameFloatingPointLiteral;
3698 if (getNextChar('d', 'D') >= 0) {
3699 return TokenNameDoubleLiteral;
3700 } else { // make the distinction between octal and float ....
3701 if (getNextChar('.')) { // bingo ! ....
3702 while (getNextCharAsDigit()) {
3705 if (getNextChar('e', 'E') >= 0) {
3706 // consume next character
3707 unicodeAsBackSlash = false;
3708 currentCharacter = source[currentPosition++];
3709 // if (((currentCharacter = source[currentPosition++]) == '\\')
3710 // && (source[currentPosition] == 'u')) {
3711 // getNextUnicodeChar();
3713 // if (withoutUnicodePtr != 0) {
3714 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3717 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3718 // consume next character
3719 unicodeAsBackSlash = false;
3720 currentCharacter = source[currentPosition++];
3721 // if (((currentCharacter = source[currentPosition++]) == '\\')
3722 // && (source[currentPosition] == 'u')) {
3723 // getNextUnicodeChar();
3725 // if (withoutUnicodePtr != 0) {
3726 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3727 // currentCharacter;
3731 if (!Character.isDigit(currentCharacter))
3732 throw new InvalidInputException(INVALID_FLOAT);
3733 while (getNextCharAsDigit()) {
3737 // if (getNextChar('f', 'F') >= 0)
3738 // return TokenNameFloatingPointLiteral;
3739 getNextChar('d', 'D'); // jump over potential d or D
3740 return TokenNameDoubleLiteral;
3742 return TokenNameIntegerLiteral;
3749 while (getNextCharAsDigit()) {
3752 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3753 // return TokenNameLongLiteral;
3754 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3755 while (getNextCharAsDigit()) {
3760 // if floating is true both exponant and suffix may be optional
3761 if (getNextChar('e', 'E') >= 0) {
3763 // consume next character
3764 unicodeAsBackSlash = false;
3765 currentCharacter = source[currentPosition++];
3766 // if (((currentCharacter = source[currentPosition++]) == '\\')
3767 // && (source[currentPosition] == 'u')) {
3768 // getNextUnicodeChar();
3770 // if (withoutUnicodePtr != 0) {
3771 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3774 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3777 unicodeAsBackSlash = false;
3778 currentCharacter = source[currentPosition++];
3779 // if (((currentCharacter = source[currentPosition++]) == '\\')
3780 // && (source[currentPosition] == 'u')) {
3781 // getNextUnicodeChar();
3783 // if (withoutUnicodePtr != 0) {
3784 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3788 if (!Character.isDigit(currentCharacter))
3789 throw new InvalidInputException(INVALID_FLOAT);
3790 while (getNextCharAsDigit()) {
3794 if (getNextChar('d', 'D') >= 0)
3795 return TokenNameDoubleLiteral;
3796 // if (getNextChar('f', 'F') >= 0)
3797 // return TokenNameFloatingPointLiteral;
3798 // the long flag has been tested before
3799 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3803 * Search the line number corresponding to a specific position
3806 public final int getLineNumber(int position) {
3807 if (lineEnds == null)
3809 int length = linePtr + 1;
3812 int g = 0, d = length - 1;
3816 if (position < lineEnds[m]) {
3818 } else if (position > lineEnds[m]) {
3824 if (position < lineEnds[m]) {
3830 public void setPHPMode(boolean mode) {
3834 public final void setSource(char[] source) {
3835 setSource(null, source);
3838 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3839 // the source-buffer is set to sourceString
3840 this.compilationUnit = compilationUnit;
3841 if (source == null) {
3842 this.source = new char[0];
3844 this.source = source;
3847 initialPosition = currentPosition = 0;
3848 containsAssertKeyword = false;
3849 withoutUnicodeBuffer = new char[this.source.length];
3850 fFillerToken = TokenNameEOF;
3851 // encapsedStringStack = new Stack();
3854 public String toString() {
3855 if (startPosition == source.length)
3856 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3857 if (currentPosition > source.length)
3858 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3859 char front[] = new char[startPosition];
3860 System.arraycopy(source, 0, front, 0, startPosition);
3861 int middleLength = (currentPosition - 1) - startPosition + 1;
3863 if (middleLength > -1) {
3864 middle = new char[middleLength];
3865 System.arraycopy(source, startPosition, middle, 0, middleLength);
3867 middle = new char[0];
3869 char end[] = new char[source.length - (currentPosition - 1)];
3870 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3871 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3872 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3876 public final String toStringAction(int act) {
3878 case TokenNameERROR:
3879 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3881 case TokenNameINLINE_HTML:
3882 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3883 case TokenNameIdentifier:
3884 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3885 case TokenNameVariable:
3886 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3887 case TokenNameabstract:
3888 return "abstract"; //$NON-NLS-1$
3890 return "AND"; //$NON-NLS-1$
3891 case TokenNamearray:
3892 return "array"; //$NON-NLS-1$
3894 return "as"; //$NON-NLS-1$
3895 case TokenNamebreak:
3896 return "break"; //$NON-NLS-1$
3898 return "case"; //$NON-NLS-1$
3899 case TokenNameclass:
3900 return "class"; //$NON-NLS-1$
3901 case TokenNamecatch:
3902 return "catch"; //$NON-NLS-1$
3903 case TokenNameclone:
3906 case TokenNameconst:
3909 case TokenNamecontinue:
3910 return "continue"; //$NON-NLS-1$
3911 case TokenNamedefault:
3912 return "default"; //$NON-NLS-1$
3913 // case TokenNamedefine :
3914 // return "define"; //$NON-NLS-1$
3916 return "do"; //$NON-NLS-1$
3918 return "echo"; //$NON-NLS-1$
3920 return "else"; //$NON-NLS-1$
3921 case TokenNameelseif:
3922 return "elseif"; //$NON-NLS-1$
3923 case TokenNameendfor:
3924 return "endfor"; //$NON-NLS-1$
3925 case TokenNameendforeach:
3926 return "endforeach"; //$NON-NLS-1$
3927 case TokenNameendif:
3928 return "endif"; //$NON-NLS-1$
3929 case TokenNameendswitch:
3930 return "endswitch"; //$NON-NLS-1$
3931 case TokenNameendwhile:
3932 return "endwhile"; //$NON-NLS-1$
3935 case TokenNameextends:
3936 return "extends"; //$NON-NLS-1$
3937 // case TokenNamefalse :
3938 // return "false"; //$NON-NLS-1$
3939 case TokenNamefinal:
3940 return "final"; //$NON-NLS-1$
3942 return "for"; //$NON-NLS-1$
3943 case TokenNameforeach:
3944 return "foreach"; //$NON-NLS-1$
3945 case TokenNamefunction:
3946 return "function"; //$NON-NLS-1$
3947 case TokenNameglobal:
3948 return "global"; //$NON-NLS-1$
3950 return "if"; //$NON-NLS-1$
3951 case TokenNameimplements:
3952 return "implements"; //$NON-NLS-1$
3953 case TokenNameinclude:
3954 return "include"; //$NON-NLS-1$
3955 case TokenNameinclude_once:
3956 return "include_once"; //$NON-NLS-1$
3957 case TokenNameinstanceof:
3958 return "instanceof"; //$NON-NLS-1$
3959 case TokenNameinterface:
3960 return "interface"; //$NON-NLS-1$
3961 case TokenNameisset:
3962 return "isset"; //$NON-NLS-1$
3964 return "list"; //$NON-NLS-1$
3966 return "new"; //$NON-NLS-1$
3967 // case TokenNamenull :
3968 // return "null"; //$NON-NLS-1$
3970 return "OR"; //$NON-NLS-1$
3971 case TokenNameprint:
3972 return "print"; //$NON-NLS-1$
3973 case TokenNameprivate:
3974 return "private"; //$NON-NLS-1$
3975 case TokenNameprotected:
3976 return "protected"; //$NON-NLS-1$
3977 case TokenNamepublic:
3978 return "public"; //$NON-NLS-1$
3979 case TokenNamerequire:
3980 return "require"; //$NON-NLS-1$
3981 case TokenNamerequire_once:
3982 return "require_once"; //$NON-NLS-1$
3983 case TokenNamereturn:
3984 return "return"; //$NON-NLS-1$
3985 // case TokenNameself:
3986 // return "self"; //$NON-NLS-1$
3987 case TokenNamestatic:
3988 return "static"; //$NON-NLS-1$
3989 case TokenNameswitch:
3990 return "switch"; //$NON-NLS-1$
3991 // case TokenNametrue :
3992 // return "true"; //$NON-NLS-1$
3993 case TokenNameunset:
3994 return "unset"; //$NON-NLS-1$
3996 return "var"; //$NON-NLS-1$
3997 case TokenNamewhile:
3998 return "while"; //$NON-NLS-1$
4000 return "XOR"; //$NON-NLS-1$
4001 // case TokenNamethis :
4002 // return "$this"; //$NON-NLS-1$
4003 case TokenNameIntegerLiteral:
4004 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4005 case TokenNameDoubleLiteral:
4006 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4007 case TokenNameStringDoubleQuote:
4008 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4009 case TokenNameStringSingleQuote:
4010 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4011 case TokenNameStringInterpolated:
4012 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4013 case TokenNameEncapsedString0:
4014 return "`"; //$NON-NLS-1$
4015 // case TokenNameEncapsedString1:
4016 // return "\'"; //$NON-NLS-1$
4017 // case TokenNameEncapsedString2:
4018 // return "\""; //$NON-NLS-1$
4019 case TokenNameSTRING:
4020 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4021 case TokenNameHEREDOC:
4022 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4023 case TokenNamePLUS_PLUS:
4024 return "++"; //$NON-NLS-1$
4025 case TokenNameMINUS_MINUS:
4026 return "--"; //$NON-NLS-1$
4027 case TokenNameEQUAL_EQUAL:
4028 return "=="; //$NON-NLS-1$
4029 case TokenNameEQUAL_EQUAL_EQUAL:
4030 return "==="; //$NON-NLS-1$
4031 case TokenNameEQUAL_GREATER:
4032 return "=>"; //$NON-NLS-1$
4033 case TokenNameLESS_EQUAL:
4034 return "<="; //$NON-NLS-1$
4035 case TokenNameGREATER_EQUAL:
4036 return ">="; //$NON-NLS-1$
4037 case TokenNameNOT_EQUAL:
4038 return "!="; //$NON-NLS-1$
4039 case TokenNameNOT_EQUAL_EQUAL:
4040 return "!=="; //$NON-NLS-1$
4041 case TokenNameLEFT_SHIFT:
4042 return "<<"; //$NON-NLS-1$
4043 case TokenNameRIGHT_SHIFT:
4044 return ">>"; //$NON-NLS-1$
4045 case TokenNamePLUS_EQUAL:
4046 return "+="; //$NON-NLS-1$
4047 case TokenNameMINUS_EQUAL:
4048 return "-="; //$NON-NLS-1$
4049 case TokenNameMULTIPLY_EQUAL:
4050 return "*="; //$NON-NLS-1$
4051 case TokenNameDIVIDE_EQUAL:
4052 return "/="; //$NON-NLS-1$
4053 case TokenNameAND_EQUAL:
4054 return "&="; //$NON-NLS-1$
4055 case TokenNameOR_EQUAL:
4056 return "|="; //$NON-NLS-1$
4057 case TokenNameXOR_EQUAL:
4058 return "^="; //$NON-NLS-1$
4059 case TokenNameREMAINDER_EQUAL:
4060 return "%="; //$NON-NLS-1$
4061 case TokenNameDOT_EQUAL:
4062 return ".="; //$NON-NLS-1$
4063 case TokenNameLEFT_SHIFT_EQUAL:
4064 return "<<="; //$NON-NLS-1$
4065 case TokenNameRIGHT_SHIFT_EQUAL:
4066 return ">>="; //$NON-NLS-1$
4067 case TokenNameOR_OR:
4068 return "||"; //$NON-NLS-1$
4069 case TokenNameAND_AND:
4070 return "&&"; //$NON-NLS-1$
4072 return "+"; //$NON-NLS-1$
4073 case TokenNameMINUS:
4074 return "-"; //$NON-NLS-1$
4075 case TokenNameMINUS_GREATER:
4078 return "!"; //$NON-NLS-1$
4079 case TokenNameREMAINDER:
4080 return "%"; //$NON-NLS-1$
4082 return "^"; //$NON-NLS-1$
4084 return "&"; //$NON-NLS-1$
4085 case TokenNameMULTIPLY:
4086 return "*"; //$NON-NLS-1$
4088 return "|"; //$NON-NLS-1$
4089 case TokenNameTWIDDLE:
4090 return "~"; //$NON-NLS-1$
4091 case TokenNameTWIDDLE_EQUAL:
4092 return "~="; //$NON-NLS-1$
4093 case TokenNameDIVIDE:
4094 return "/"; //$NON-NLS-1$
4095 case TokenNameGREATER:
4096 return ">"; //$NON-NLS-1$
4098 return "<"; //$NON-NLS-1$
4099 case TokenNameLPAREN:
4100 return "("; //$NON-NLS-1$
4101 case TokenNameRPAREN:
4102 return ")"; //$NON-NLS-1$
4103 case TokenNameLBRACE:
4104 return "{"; //$NON-NLS-1$
4105 case TokenNameRBRACE:
4106 return "}"; //$NON-NLS-1$
4107 case TokenNameLBRACKET:
4108 return "["; //$NON-NLS-1$
4109 case TokenNameRBRACKET:
4110 return "]"; //$NON-NLS-1$
4111 case TokenNameSEMICOLON:
4112 return ";"; //$NON-NLS-1$
4113 case TokenNameQUESTION:
4114 return "?"; //$NON-NLS-1$
4115 case TokenNameCOLON:
4116 return ":"; //$NON-NLS-1$
4117 case TokenNameCOMMA:
4118 return ","; //$NON-NLS-1$
4120 return "."; //$NON-NLS-1$
4121 case TokenNameEQUAL:
4122 return "="; //$NON-NLS-1$
4125 case TokenNameDOLLAR:
4127 case TokenNameDOLLAR_LBRACE:
4129 case TokenNameLBRACE_DOLLAR:
4132 return "EOF"; //$NON-NLS-1$
4133 case TokenNameWHITESPACE:
4134 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4135 case TokenNameCOMMENT_LINE:
4136 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4137 case TokenNameCOMMENT_BLOCK:
4138 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4139 case TokenNameCOMMENT_PHPDOC:
4140 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4141 // case TokenNameHTML :
4142 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4145 return "__FILE__"; //$NON-NLS-1$
4147 return "__LINE__"; //$NON-NLS-1$
4148 case TokenNameCLASS_C:
4149 return "__CLASS__"; //$NON-NLS-1$
4150 case TokenNameMETHOD_C:
4151 return "__METHOD__"; //$NON-NLS-1$
4152 case TokenNameFUNC_C:
4153 return "__FUNCTION__"; //$NON-NLS-1
4154 case TokenNameboolCAST:
4155 return "( bool )"; //$NON-NLS-1$
4156 case TokenNameintCAST:
4157 return "( int )"; //$NON-NLS-1$
4158 case TokenNamedoubleCAST:
4159 return "( double )"; //$NON-NLS-1$
4160 case TokenNameobjectCAST:
4161 return "( object )"; //$NON-NLS-1$
4162 case TokenNamestringCAST:
4163 return "( string )"; //$NON-NLS-1$
4165 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4173 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4174 this(tokenizeComments, tokenizeWhiteSpace, false);
4177 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4178 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4181 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4182 boolean assertMode) {
4183 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4186 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4187 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4188 this.eofPosition = Integer.MAX_VALUE;
4189 this.tokenizeComments = tokenizeComments;
4190 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4191 this.tokenizeStrings = tokenizeStrings;
4192 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4193 // this.assertMode = assertMode;
4194 // this.encapsedStringStack = null;
4195 this.taskTags = taskTags;
4196 this.taskPriorities = taskPriorities;
4199 private void checkNonExternalizeString() throws InvalidInputException {
4200 if (currentLine == null)
4202 parseTags(currentLine);
4205 private void parseTags(NLSLine line) throws InvalidInputException {
4206 String s = new String(getCurrentTokenSource());
4207 int pos = s.indexOf(TAG_PREFIX);
4208 int lineLength = line.size();
4210 int start = pos + TAG_PREFIX_LENGTH;
4211 int end = s.indexOf(TAG_POSTFIX, start);
4212 String index = s.substring(start, end);
4215 i = Integer.parseInt(index) - 1;
4216 // Tags are one based not zero based.
4217 } catch (NumberFormatException e) {
4218 i = -1; // we don't want to consider this as a valid NLS tag
4220 if (line.exists(i)) {
4223 pos = s.indexOf(TAG_PREFIX, start);
4225 this.nonNLSStrings = new StringLiteral[lineLength];
4226 int nonNLSCounter = 0;
4227 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4228 StringLiteral literal = (StringLiteral) iterator.next();
4229 if (literal != null) {
4230 this.nonNLSStrings[nonNLSCounter++] = literal;
4233 if (nonNLSCounter == 0) {
4234 this.nonNLSStrings = null;
4238 this.wasNonExternalizedStringLiteral = true;
4239 if (nonNLSCounter != lineLength) {
4240 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4245 public final void scanEscapeCharacter() throws InvalidInputException {
4246 // the string with "\\u" is a legal string of two chars \ and u
4247 // thus we use a direct access to the source (for regular cases).
4248 if (unicodeAsBackSlash) {
4249 // consume next character
4250 unicodeAsBackSlash = false;
4251 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4252 // (source[currentPosition] == 'u')) {
4253 // getNextUnicodeChar();
4255 if (withoutUnicodePtr != 0) {
4256 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4260 currentCharacter = source[currentPosition++];
4261 switch (currentCharacter) {
4263 currentCharacter = '\b';
4266 currentCharacter = '\t';
4269 currentCharacter = '\n';
4272 currentCharacter = '\f';
4275 currentCharacter = '\r';
4278 currentCharacter = '\"';
4281 currentCharacter = '\'';
4284 currentCharacter = '\\';
4287 // -----------octal escape--------------
4289 // OctalDigit OctalDigit
4290 // ZeroToThree OctalDigit OctalDigit
4291 int number = Character.getNumericValue(currentCharacter);
4292 if (number >= 0 && number <= 7) {
4293 boolean zeroToThreeNot = number > 3;
4294 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4295 int digit = Character.getNumericValue(currentCharacter);
4296 if (digit >= 0 && digit <= 7) {
4297 number = (number * 8) + digit;
4298 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4299 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4300 // Digit --> ignore last character
4303 digit = Character.getNumericValue(currentCharacter);
4304 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4305 // OctalDigit OctalDigit
4306 number = (number * 8) + digit;
4307 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4308 // --> ignore last character
4312 } else { // has read \OctalDigit NonDigit--> ignore last
4316 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4320 } else { // has read \OctalDigit --> ignore last character
4324 throw new InvalidInputException(INVALID_ESCAPE);
4325 currentCharacter = (char) number;
4327 throw new InvalidInputException(INVALID_ESCAPE);
4331 // chech presence of task: tags
4332 // TODO (frederic) see if we need to take unicode characters into account...
4333 public void checkTaskTag(int commentStart, int commentEnd) {
4334 char[] src = this.source;
4336 // only look for newer task: tags
4337 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4340 int foundTaskIndex = this.foundTaskCount;
4341 char previous = src[commentStart + 1]; // should be '*' or '/'
4342 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4344 char[] priority = null;
4345 // check for tag occurrence only if not ambiguous with javadoc tag
4346 if (previous != '@') {
4347 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4348 tag = this.taskTags[itag];
4349 int tagLength = tag.length;
4353 // ensure tag is not leaded with letter if tag starts with a letter
4354 if (Scanner.isPHPIdentifierStart(tag[0])) {
4355 if (Scanner.isPHPIdentifierPart(previous)) {
4360 for (int t = 0; t < tagLength; t++) {
4363 if (x >= this.eofPosition || x >= commentEnd)
4365 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4366 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4373 // ensure tag is not followed with letter if tag finishes with a
4375 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4376 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4379 if (this.foundTaskTags == null) {
4380 this.foundTaskTags = new char[5][];
4381 this.foundTaskMessages = new char[5][];
4382 this.foundTaskPriorities = new char[5][];
4383 this.foundTaskPositions = new int[5][];
4384 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4385 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4386 this.foundTaskCount);
4387 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4388 this.foundTaskCount);
4389 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4390 this.foundTaskCount);
4391 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4392 this.foundTaskCount);
4395 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4397 this.foundTaskTags[this.foundTaskCount] = tag;
4398 this.foundTaskPriorities[this.foundTaskCount] = priority;
4399 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4400 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4401 this.foundTaskCount++;
4402 i += tagLength - 1; // will be incremented when looping
4408 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4409 // retrieve message start and end positions
4410 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4411 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4412 // at most beginning of next task
4413 if (max_value < msgStart) {
4414 max_value = msgStart; // would only occur if tag is before EOF.
4418 for (int j = msgStart; j < max_value; j++) {
4419 if ((c = src[j]) == '\n' || c == '\r') {
4425 for (int j = max_value; j > msgStart; j--) {
4426 if ((c = src[j]) == '*') {
4434 if (msgStart == end)
4437 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4439 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4441 // update the end position of the task
4442 this.foundTaskPositions[i][1] = end;
4443 // get the message source
4444 final int messageLength = end - msgStart + 1;
4445 char[] message = new char[messageLength];
4446 System.arraycopy(src, msgStart, message, 0, messageLength);
4447 this.foundTaskMessages[i] = message;
4451 // chech presence of task: tags
4452 // public void checkTaskTag(int commentStart, int commentEnd) {
4453 // // only look for newer task: tags
4454 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4455 // - 1][0] >= commentStart) {
4458 // int foundTaskIndex = this.foundTaskCount;
4459 // nextChar: for (int i = commentStart; i < commentEnd && i <
4460 // this.eofPosition; i++) {
4461 // char[] tag = null;
4462 // char[] priority = null;
4463 // // check for tag occurrence
4464 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4465 // tag = this.taskTags[itag];
4466 // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4467 // ? this.taskPriorities[itag] : null;
4468 // int tagLength = tag.length;
4469 // for (int t = 0; t < tagLength; t++) {
4470 // if (this.source[i + t] != tag[t])
4471 // continue nextTag;
4473 // if (this.foundTaskTags == null) {
4474 // this.foundTaskTags = new char[5][];
4475 // this.foundTaskMessages = new char[5][];
4476 // this.foundTaskPriorities = new char[5][];
4477 // this.foundTaskPositions = new int[5][];
4478 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4479 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4480 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4481 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4482 // char[this.foundTaskCount * 2][], 0,
4483 // this.foundTaskCount);
4484 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4485 // new char[this.foundTaskCount * 2][], 0,
4486 // this.foundTaskCount);
4487 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4488 // int[this.foundTaskCount * 2][], 0,
4489 // this.foundTaskCount);
4491 // this.foundTaskTags[this.foundTaskCount] = tag;
4492 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4493 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4495 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4496 // this.foundTaskCount++;
4497 // i += tagLength - 1; // will be incremented when looping
4500 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4501 // // retrieve message start and end positions
4502 // int msgStart = this.foundTaskPositions[i][0] +
4503 // this.foundTaskTags[i].length;
4504 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4505 // 1][0] - 1 : commentEnd - 1;
4506 // // at most beginning of next task
4507 // if (max_value < msgStart)
4508 // max_value = msgStart; // would only occur if tag is before EOF.
4511 // for (int j = msgStart; j < max_value; j++) {
4512 // if ((c = this.source[j]) == '\n' || c == '\r') {
4518 // for (int j = max_value; j > msgStart; j--) {
4519 // if ((c = this.source[j]) == '*') {
4527 // if (msgStart == end)
4528 // continue; // empty
4529 // // trim the message
4530 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4532 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4534 // // update the end position of the task
4535 // this.foundTaskPositions[i][1] = end;
4536 // // get the message source
4537 // final int messageLength = end - msgStart + 1;
4538 // char[] message = new char[messageLength];
4539 // System.arraycopy(source, msgStart, message, 0, messageLength);
4540 // this.foundTaskMessages[i] = message;