1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this
24 * value is not memorized by the scanner) - getCurrentTokenSource() which
25 * provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 // private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
34 // flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
39 public boolean ignorePHPOneLiner = false;
41 public boolean phpMode = false;
43 public boolean phpExpressionTag = false;
45 // public Stack encapsedStringStack = null;
47 public char currentCharacter;
49 public int startPosition;
51 public int currentPosition;
53 public int initialPosition, eofPosition;
55 // after this position eof are generated instead of real token from the
57 public boolean tokenizeComments;
59 public boolean tokenizeWhiteSpace;
61 public boolean tokenizeStrings;
63 // source should be viewed as a window (aka a part)
64 // of a entire very large stream
68 public char[] withoutUnicodeBuffer;
70 public int withoutUnicodePtr;
72 // when == 0 ==> no unicode in the current token
73 public boolean unicodeAsBackSlash = false;
75 public boolean scanningFloatLiteral = false;
77 // support for /** comments
78 public int[] commentStops = new int[10];
80 public int[] commentStarts = new int[10];
82 public int commentPtr = -1; // no comment test with commentPtr value -1
84 protected int lastCommentLinePosition = -1;
86 // diet parsing support - jump over some method body when requested
87 public boolean diet = false;
89 // support for the poor-line-debuggers ....
90 // remember the position of the cr/lf
91 public int[] lineEnds = new int[250];
93 public int linePtr = -1;
95 public boolean wasAcr = false;
97 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
99 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
101 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
103 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
105 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
107 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
109 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
111 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
113 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
115 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
117 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
119 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
121 // ----------------optimized identifier managment------------------
122 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
123 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
124 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
125 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
126 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
127 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
128 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
129 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
130 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
132 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133 '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
134 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
142 public final static int MAX_OBVIOUS = 256;
144 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
146 public final static int C_DOLLAR = 8;
148 public final static int C_LETTER = 4;
150 public final static int C_DIGIT = 3;
152 public final static int C_SEPARATOR = 2;
154 public final static int C_SPACE = 1;
156 for (int i = '0'; i <= '9'; i++)
157 ObviousIdentCharNatures[i] = C_DIGIT;
159 for (int i = 'a'; i <= 'z'; i++)
160 ObviousIdentCharNatures[i] = C_LETTER;
161 for (int i = 'A'; i <= 'Z'; i++)
162 ObviousIdentCharNatures[i] = C_LETTER;
163 ObviousIdentCharNatures['_'] = C_LETTER;
164 for (int i = 127; i <= 255; i++)
165 ObviousIdentCharNatures[i] = C_LETTER;
167 ObviousIdentCharNatures['$'] = C_DOLLAR;
169 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
170 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
171 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
172 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
173 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
175 ObviousIdentCharNatures['.'] = C_SEPARATOR;
176 ObviousIdentCharNatures[':'] = C_SEPARATOR;
177 ObviousIdentCharNatures[';'] = C_SEPARATOR;
178 ObviousIdentCharNatures[','] = C_SEPARATOR;
179 ObviousIdentCharNatures['['] = C_SEPARATOR;
180 ObviousIdentCharNatures[']'] = C_SEPARATOR;
181 ObviousIdentCharNatures['('] = C_SEPARATOR;
182 ObviousIdentCharNatures[')'] = C_SEPARATOR;
183 ObviousIdentCharNatures['{'] = C_SEPARATOR;
184 ObviousIdentCharNatures['}'] = C_SEPARATOR;
185 ObviousIdentCharNatures['+'] = C_SEPARATOR;
186 ObviousIdentCharNatures['-'] = C_SEPARATOR;
187 ObviousIdentCharNatures['*'] = C_SEPARATOR;
188 ObviousIdentCharNatures['/'] = C_SEPARATOR;
189 ObviousIdentCharNatures['='] = C_SEPARATOR;
190 ObviousIdentCharNatures['&'] = C_SEPARATOR;
191 ObviousIdentCharNatures['|'] = C_SEPARATOR;
192 ObviousIdentCharNatures['?'] = C_SEPARATOR;
193 ObviousIdentCharNatures['<'] = C_SEPARATOR;
194 ObviousIdentCharNatures['>'] = C_SEPARATOR;
195 ObviousIdentCharNatures['!'] = C_SEPARATOR;
196 ObviousIdentCharNatures['%'] = C_SEPARATOR;
197 ObviousIdentCharNatures['^'] = C_SEPARATOR;
198 ObviousIdentCharNatures['~'] = C_SEPARATOR;
199 ObviousIdentCharNatures['"'] = C_SEPARATOR;
200 ObviousIdentCharNatures['\''] = C_SEPARATOR;
202 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
204 static final int TableSize = 30, InternalTableSize = 6;
206 // 30*6 = 180 entries
207 public static final int OptimizedLength = 6;
210 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
212 // support for detecting non-externalized string literals
213 int currentLineNr = -1;
215 int previousLineNr = -1;
217 NLSLine currentLine = null;
219 List lines = new ArrayList();
221 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
223 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
225 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
227 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
229 public StringLiteral[] nonNLSStrings = null;
231 public boolean checkNonExternalizedStringLiterals = true;
233 public boolean wasNonExternalizedStringLiteral = false;
236 for (int i = 0; i < 6; i++) {
237 for (int j = 0; j < TableSize; j++) {
238 for (int k = 0; k < InternalTableSize; k++) {
239 charArray_length[i][j][k] = initCharArray;
245 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
247 public static final int RoundBracket = 0;
249 public static final int SquareBracket = 1;
251 public static final int CurlyBracket = 2;
253 public static final int BracketKinds = 3;
256 public char[][] foundTaskTags = null;
258 public char[][] foundTaskMessages;
260 public char[][] foundTaskPriorities = null;
262 public int[][] foundTaskPositions;
264 public int foundTaskCount = 0;
266 public char[][] taskTags = null;
268 public char[][] taskPriorities = null;
270 public boolean isTaskCaseSensitive = true;
272 public static final boolean DEBUG = false;
274 public static final boolean TRACE = false;
276 public ICompilationUnit compilationUnit = null;
279 * Determines if the specified character is permissible as the first character
280 * in a PHP identifier or variable
282 * The '$' character for PHP variables is regarded as a correct first
286 public static boolean isPHPIdentOrVarStart(char ch) {
287 if (ch < MAX_OBVIOUS) {
288 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
291 //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
295 * Determines if the specified character is permissible as the first character
296 * in a PHP identifier.
298 * The '$' character for PHP variables isn't regarded as the first character !
300 public static boolean isPHPIdentifierStart(char ch) {
301 if (ch < MAX_OBVIOUS) {
302 return ObviousIdentCharNatures[ch]==C_LETTER;
305 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
309 * Determines if the specified character may be part of a PHP identifier as
310 * other than the first character
312 public static boolean isPHPIdentifierPart(char ch) {
313 if (ch < MAX_OBVIOUS) {
314 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
317 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
320 public static boolean isSQLIdentifierPart(char ch) {
321 if (ch < MAX_OBVIOUS) {
322 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
327 public final boolean atEnd() {
328 // This code is not relevant if source is
329 // Only a part of the real stream input
330 return source.length == currentPosition;
333 public char[] getCurrentIdentifierSource() {
334 // return the token REAL source (aka unicodes are precomputed)
336 // if (withoutUnicodePtr != 0)
337 // //0 is used as a fast test flag so the real first char is in position 1
339 // withoutUnicodeBuffer,
341 // result = new char[withoutUnicodePtr],
343 // withoutUnicodePtr);
345 int length = currentPosition - startPosition;
346 switch (length) { // see OptimizedLength
348 return optimizedCurrentTokenSource1();
350 return optimizedCurrentTokenSource2();
352 return optimizedCurrentTokenSource3();
354 return optimizedCurrentTokenSource4();
356 return optimizedCurrentTokenSource5();
358 return optimizedCurrentTokenSource6();
361 System.arraycopy(source, startPosition, result = new char[length], 0, length);
366 public int getCurrentTokenEndPosition() {
367 return this.currentPosition - 1;
370 public final char[] getCurrentTokenSource() {
371 // Return the token REAL source (aka unicodes are precomputed)
373 // if (withoutUnicodePtr != 0)
374 // // 0 is used as a fast test flag so the real first char is in position 1
376 // withoutUnicodeBuffer,
378 // result = new char[withoutUnicodePtr],
380 // withoutUnicodePtr);
383 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
388 public final char[] getCurrentTokenSource(int startPos) {
389 // Return the token REAL source (aka unicodes are precomputed)
391 // if (withoutUnicodePtr != 0)
392 // // 0 is used as a fast test flag so the real first char is in position 1
394 // withoutUnicodeBuffer,
396 // result = new char[withoutUnicodePtr],
398 // withoutUnicodePtr);
401 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
406 public final char[] getCurrentTokenSourceString() {
407 // return the token REAL source (aka unicodes are precomputed).
408 // REMOVE the two " that are at the beginning and the end.
410 if (withoutUnicodePtr != 0)
411 // 0 is used as a fast test flag so the real first char is in position 1
412 System.arraycopy(withoutUnicodeBuffer, 2,
413 // 2 is 1 (real start) + 1 (to jump over the ")
414 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
417 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
422 public final boolean equalsCurrentTokenSource(char[] word) {
423 if (word.length != currentPosition - startPosition) {
426 for (int i = 0; i < word.length; i++) {
427 if (word[i]!=source[startPosition+i]){
434 public final char[] getRawTokenSourceEnd() {
435 int length = this.eofPosition - this.currentPosition - 1;
436 char[] sourceEnd = new char[length];
437 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
441 public int getCurrentTokenStartPosition() {
442 return this.startPosition;
445 public final String getCurrentStringLiteral() {
446 char[] result = getCurrentStringLiteralSource();
447 return new String(result);
450 public final char[] getCurrentStringLiteralSource() {
451 // Return the token REAL source (aka unicodes are precomputed)
452 if (startPosition + 1 >= currentPosition) {
457 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
462 public final char[] getCurrentStringLiteralSource(int startPos) {
463 // Return the token REAL source (aka unicodes are precomputed)
466 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
472 * Search the source position corresponding to the end of a given line number
474 * Line numbers are 1-based, and relative to the scanner initialPosition.
475 * Character positions are 0-based.
477 * In case the given line number is inconsistent, answers -1.
479 public final int getLineEnd(int lineNumber) {
480 if (lineEnds == null)
482 if (lineNumber >= lineEnds.length)
486 if (lineNumber == lineEnds.length - 1)
488 return lineEnds[lineNumber - 1];
489 // next line start one character behind the lineEnd of the previous line
493 * Search the source position corresponding to the beginning of a given line
496 * Line numbers are 1-based, and relative to the scanner initialPosition.
497 * Character positions are 0-based.
499 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
501 * In case the given line number is inconsistent, answers -1.
503 public final int getLineStart(int lineNumber) {
504 if (lineEnds == null)
506 if (lineNumber >= lineEnds.length)
511 return initialPosition;
512 return lineEnds[lineNumber - 2] + 1;
513 // next line start one character behind the lineEnd of the previous line
516 public final boolean getNextChar(char testedChar) {
518 // handle the case of unicode.
519 // when a unicode appears then we must use a buffer that holds char
521 // At the end of this method currentCharacter holds the new visited char
522 // and currentPosition points right next after it
523 // Both previous lines are true if the currentCharacter is == to the
525 // On false, no side effect has occured.
526 // ALL getNextChar.... ARE OPTIMIZED COPIES
527 int temp = currentPosition;
529 currentCharacter = source[currentPosition++];
530 // if (((currentCharacter = source[currentPosition++]) == '\\')
531 // && (source[currentPosition] == 'u')) {
532 // //-------------unicode traitement ------------
533 // int c1, c2, c3, c4;
534 // int unicodeSize = 6;
535 // currentPosition++;
536 // while (source[currentPosition] == 'u') {
537 // currentPosition++;
541 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
543 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
545 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
547 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
549 // currentPosition = temp;
553 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
554 // if (currentCharacter != testedChar) {
555 // currentPosition = temp;
558 // unicodeAsBackSlash = currentCharacter == '\\';
560 // //need the unicode buffer
561 // if (withoutUnicodePtr == 0) {
562 // //buffer all the entries that have been left aside....
563 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
567 // withoutUnicodeBuffer,
569 // withoutUnicodePtr);
571 // //fill the buffer with the char
572 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
575 // } //-------------end unicode traitement--------------
577 if (currentCharacter != testedChar) {
578 currentPosition = temp;
581 unicodeAsBackSlash = false;
582 // if (withoutUnicodePtr != 0)
583 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
586 } catch (IndexOutOfBoundsException e) {
587 unicodeAsBackSlash = false;
588 currentPosition = temp;
593 public final int getNextChar(char testedChar1, char testedChar2) {
594 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
595 // test can be done with (x==0) for the first and (x>0) for the second
596 // handle the case of unicode.
597 // when a unicode appears then we must use a buffer that holds char
599 // At the end of this method currentCharacter holds the new visited char
600 // and currentPosition points right next after it
601 // Both previous lines are true if the currentCharacter is == to the
603 // On false, no side effect has occured.
604 // ALL getNextChar.... ARE OPTIMIZED COPIES
605 int temp = currentPosition;
608 currentCharacter = source[currentPosition++];
609 // if (((currentCharacter = source[currentPosition++]) == '\\')
610 // && (source[currentPosition] == 'u')) {
611 // //-------------unicode traitement ------------
612 // int c1, c2, c3, c4;
613 // int unicodeSize = 6;
614 // currentPosition++;
615 // while (source[currentPosition] == 'u') {
616 // currentPosition++;
620 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
622 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
624 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
626 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
628 // currentPosition = temp;
632 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
633 // if (currentCharacter == testedChar1)
635 // else if (currentCharacter == testedChar2)
638 // currentPosition = temp;
642 // //need the unicode buffer
643 // if (withoutUnicodePtr == 0) {
644 // //buffer all the entries that have been left aside....
645 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
649 // withoutUnicodeBuffer,
651 // withoutUnicodePtr);
653 // //fill the buffer with the char
654 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
656 // } //-------------end unicode traitement--------------
658 if (currentCharacter == testedChar1)
660 else if (currentCharacter == testedChar2)
663 currentPosition = temp;
666 // if (withoutUnicodePtr != 0)
667 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
670 } catch (IndexOutOfBoundsException e) {
671 currentPosition = temp;
676 public final boolean getNextCharAsDigit() {
678 // handle the case of unicode.
679 // when a unicode appears then we must use a buffer that holds char
681 // At the end of this method currentCharacter holds the new visited char
682 // and currentPosition points right next after it
683 // Both previous lines are true if the currentCharacter is a digit
684 // On false, no side effect has occured.
685 // ALL getNextChar.... ARE OPTIMIZED COPIES
686 int temp = currentPosition;
688 currentCharacter = source[currentPosition++];
689 // if (((currentCharacter = source[currentPosition++]) == '\\')
690 // && (source[currentPosition] == 'u')) {
691 // //-------------unicode traitement ------------
692 // int c1, c2, c3, c4;
693 // int unicodeSize = 6;
694 // currentPosition++;
695 // while (source[currentPosition] == 'u') {
696 // currentPosition++;
700 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
702 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
704 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
706 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
708 // currentPosition = temp;
712 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
713 // if (!Character.isDigit(currentCharacter)) {
714 // currentPosition = temp;
718 // //need the unicode buffer
719 // if (withoutUnicodePtr == 0) {
720 // //buffer all the entries that have been left aside....
721 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
725 // withoutUnicodeBuffer,
727 // withoutUnicodePtr);
729 // //fill the buffer with the char
730 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
732 // } //-------------end unicode traitement--------------
734 if (!Character.isDigit(currentCharacter)) {
735 currentPosition = temp;
738 // if (withoutUnicodePtr != 0)
739 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
742 } catch (IndexOutOfBoundsException e) {
743 currentPosition = temp;
748 public final boolean getNextCharAsDigit(int radix) {
750 // handle the case of unicode.
751 // when a unicode appears then we must use a buffer that holds char
753 // At the end of this method currentCharacter holds the new visited char
754 // and currentPosition points right next after it
755 // Both previous lines are true if the currentCharacter is a digit base on
757 // On false, no side effect has occured.
758 // ALL getNextChar.... ARE OPTIMIZED COPIES
759 int temp = currentPosition;
761 currentCharacter = source[currentPosition++];
762 // if (((currentCharacter = source[currentPosition++]) == '\\')
763 // && (source[currentPosition] == 'u')) {
764 // //-------------unicode traitement ------------
765 // int c1, c2, c3, c4;
766 // int unicodeSize = 6;
767 // currentPosition++;
768 // while (source[currentPosition] == 'u') {
769 // currentPosition++;
773 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
775 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
777 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
779 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
781 // currentPosition = temp;
785 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
786 // if (Character.digit(currentCharacter, radix) == -1) {
787 // currentPosition = temp;
791 // //need the unicode buffer
792 // if (withoutUnicodePtr == 0) {
793 // //buffer all the entries that have been left aside....
794 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
798 // withoutUnicodeBuffer,
800 // withoutUnicodePtr);
802 // //fill the buffer with the char
803 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
805 // } //-------------end unicode traitement--------------
807 if (Character.digit(currentCharacter, radix) == -1) {
808 currentPosition = temp;
811 // if (withoutUnicodePtr != 0)
812 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
815 } catch (IndexOutOfBoundsException e) {
816 currentPosition = temp;
821 public boolean getNextCharAsJavaIdentifierPart() {
823 // handle the case of unicode.
824 // when a unicode appears then we must use a buffer that holds char
826 // At the end of this method currentCharacter holds the new visited char
827 // and currentPosition points right next after it
828 // Both previous lines are true if the currentCharacter is a
829 // JavaIdentifierPart
830 // On false, no side effect has occured.
831 // ALL getNextChar.... ARE OPTIMIZED COPIES
832 int temp = currentPosition;
834 currentCharacter = source[currentPosition++];
835 // if (((currentCharacter = source[currentPosition++]) == '\\')
836 // && (source[currentPosition] == 'u')) {
837 // //-------------unicode traitement ------------
838 // int c1, c2, c3, c4;
839 // int unicodeSize = 6;
840 // currentPosition++;
841 // while (source[currentPosition] == 'u') {
842 // currentPosition++;
846 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
848 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
850 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
852 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
854 // currentPosition = temp;
858 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
859 // if (!isPHPIdentifierPart(currentCharacter)) {
860 // currentPosition = temp;
864 // //need the unicode buffer
865 // if (withoutUnicodePtr == 0) {
866 // //buffer all the entries that have been left aside....
867 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
871 // withoutUnicodeBuffer,
873 // withoutUnicodePtr);
875 // //fill the buffer with the char
876 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
878 // } //-------------end unicode traitement--------------
880 if (!isPHPIdentifierPart(currentCharacter)) {
881 currentPosition = temp;
884 // if (withoutUnicodePtr != 0)
885 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
888 } catch (IndexOutOfBoundsException e) {
889 currentPosition = temp;
894 public int getCastOrParen() {
895 int tempPosition = currentPosition;
896 char tempCharacter = currentCharacter;
897 int tempToken = TokenNameLPAREN;
898 boolean found = false;
899 StringBuffer buf = new StringBuffer();
902 currentCharacter = source[currentPosition++];
903 } while (currentCharacter == ' ' || currentCharacter == '\t');
904 while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
905 // while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
906 buf.append(currentCharacter);
907 currentCharacter = source[currentPosition++];
909 if (buf.length() >= 3 && buf.length() <= 7) {
910 char[] data = buf.toString().toCharArray();
912 switch (data.length) {
915 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
917 tempToken = TokenNameintCAST;
922 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
924 tempToken = TokenNameboolCAST;
927 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
929 tempToken = TokenNamedoubleCAST;
935 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
936 && (data[++index] == 'y')) {
938 tempToken = TokenNamearrayCAST;
941 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
942 && (data[++index] == 't')) {
944 tempToken = TokenNameunsetCAST;
947 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
948 && (data[++index] == 't')) {
950 tempToken = TokenNamedoubleCAST;
956 // object string double
957 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
958 && (data[++index] == 'c') && (data[++index] == 't')) {
960 tempToken = TokenNameobjectCAST;
963 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
964 && (data[++index] == 'n') && (data[++index] == 'g')) {
966 tempToken = TokenNamestringCAST;
969 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
970 && (data[++index] == 'l') && (data[++index] == 'e')) {
972 tempToken = TokenNamedoubleCAST;
979 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
980 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
982 tempToken = TokenNameboolCAST;
985 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
986 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
988 tempToken = TokenNameintCAST;
994 while (currentCharacter == ' ' || currentCharacter == '\t') {
995 currentCharacter = source[currentPosition++];
997 if (currentCharacter == ')') {
1002 } catch (IndexOutOfBoundsException e) {
1004 currentCharacter = tempCharacter;
1005 currentPosition = tempPosition;
1006 return TokenNameLPAREN;
1009 public void consumeStringInterpolated() throws InvalidInputException {
1011 // consume next character
1012 unicodeAsBackSlash = false;
1013 currentCharacter = source[currentPosition++];
1014 // if (((currentCharacter = source[currentPosition++]) == '\\')
1015 // && (source[currentPosition] == 'u')) {
1016 // getNextUnicodeChar();
1018 // if (withoutUnicodePtr != 0) {
1019 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1020 // currentCharacter;
1023 while (currentCharacter != '`') {
1024 /** ** in PHP \r and \n are valid in string literals *** */
1025 // if ((currentCharacter == '\n')
1026 // || (currentCharacter == '\r')) {
1027 // // relocate if finding another quote fairly close: thus unicode
1028 // '/u000D' will be fully consumed
1029 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1030 // if (currentPosition + lookAhead == source.length)
1032 // if (source[currentPosition + lookAhead] == '\n')
1034 // if (source[currentPosition + lookAhead] == '\"') {
1035 // currentPosition += lookAhead + 1;
1039 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1041 if (currentCharacter == '\\') {
1042 int escapeSize = currentPosition;
1043 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1044 // scanEscapeCharacter make a side effect on this value and we need
1045 // the previous value few lines down this one
1046 scanDoubleQuotedEscapeCharacter();
1047 escapeSize = currentPosition - escapeSize;
1048 if (withoutUnicodePtr == 0) {
1049 // buffer all the entries that have been left aside....
1050 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1051 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1052 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1053 } else { // overwrite the / in the buffer
1054 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1055 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1056 // where only one is correct
1057 withoutUnicodePtr--;
1060 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1061 if (recordLineSeparator) {
1062 pushLineSeparator();
1065 // consume next character
1066 unicodeAsBackSlash = false;
1067 currentCharacter = source[currentPosition++];
1068 // if (((currentCharacter = source[currentPosition++]) == '\\')
1069 // && (source[currentPosition] == 'u')) {
1070 // getNextUnicodeChar();
1072 if (withoutUnicodePtr != 0) {
1073 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1077 } catch (IndexOutOfBoundsException e) {
1078 // reset end position for error reporting
1079 currentPosition -= 2;
1080 throw new InvalidInputException(UNTERMINATED_STRING);
1081 } catch (InvalidInputException e) {
1082 if (e.getMessage().equals(INVALID_ESCAPE)) {
1083 // relocate if finding another quote fairly close: thus unicode
1084 // '/u000D' will be fully consumed
1085 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1086 if (currentPosition + lookAhead == source.length)
1088 if (source[currentPosition + lookAhead] == '\n')
1090 if (source[currentPosition + lookAhead] == '`') {
1091 currentPosition += lookAhead + 1;
1098 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1099 // //$NON-NLS-?$ where ? is an
1101 if (currentLine == null) {
1102 currentLine = new NLSLine();
1103 lines.add(currentLine);
1105 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1109 public void consumeStringConstant() throws InvalidInputException {
1111 // consume next character
1112 unicodeAsBackSlash = false;
1113 currentCharacter = source[currentPosition++];
1114 // if (((currentCharacter = source[currentPosition++]) == '\\')
1115 // && (source[currentPosition] == 'u')) {
1116 // getNextUnicodeChar();
1118 // if (withoutUnicodePtr != 0) {
1119 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1120 // currentCharacter;
1123 while (currentCharacter != '\'') {
1124 /** ** in PHP \r and \n are valid in string literals *** */
1125 // if ((currentCharacter == '\n')
1126 // || (currentCharacter == '\r')) {
1127 // // relocate if finding another quote fairly close: thus unicode
1128 // '/u000D' will be fully consumed
1129 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1130 // if (currentPosition + lookAhead == source.length)
1132 // if (source[currentPosition + lookAhead] == '\n')
1134 // if (source[currentPosition + lookAhead] == '\"') {
1135 // currentPosition += lookAhead + 1;
1139 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1141 if (currentCharacter == '\\') {
1142 int escapeSize = currentPosition;
1143 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1144 // scanEscapeCharacter make a side effect on this value and we need
1145 // the previous value few lines down this one
1146 scanSingleQuotedEscapeCharacter();
1147 escapeSize = currentPosition - escapeSize;
1148 if (withoutUnicodePtr == 0) {
1149 // buffer all the entries that have been left aside....
1150 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1151 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1152 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1153 } else { // overwrite the / in the buffer
1154 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1155 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1156 // where only one is correct
1157 withoutUnicodePtr--;
1160 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1161 if (recordLineSeparator) {
1162 pushLineSeparator();
1165 // consume next character
1166 unicodeAsBackSlash = false;
1167 currentCharacter = source[currentPosition++];
1168 // if (((currentCharacter = source[currentPosition++]) == '\\')
1169 // && (source[currentPosition] == 'u')) {
1170 // getNextUnicodeChar();
1172 if (withoutUnicodePtr != 0) {
1173 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1177 } catch (IndexOutOfBoundsException e) {
1178 // reset end position for error reporting
1179 currentPosition -= 2;
1180 throw new InvalidInputException(UNTERMINATED_STRING);
1181 } catch (InvalidInputException e) {
1182 if (e.getMessage().equals(INVALID_ESCAPE)) {
1183 // relocate if finding another quote fairly close: thus unicode
1184 // '/u000D' will be fully consumed
1185 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1186 if (currentPosition + lookAhead == source.length)
1188 if (source[currentPosition + lookAhead] == '\n')
1190 if (source[currentPosition + lookAhead] == '\'') {
1191 currentPosition += lookAhead + 1;
1198 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1199 // //$NON-NLS-?$ where ? is an
1201 if (currentLine == null) {
1202 currentLine = new NLSLine();
1203 lines.add(currentLine);
1205 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1209 public void consumeStringLiteral() throws InvalidInputException {
1211 boolean openDollarBrace = false;
1212 // consume next character
1213 unicodeAsBackSlash = false;
1214 currentCharacter = source[currentPosition++];
1215 while (currentCharacter != '"' || openDollarBrace) {
1216 /** ** in PHP \r and \n are valid in string literals *** */
1217 if (currentCharacter == '\\') {
1218 int escapeSize = currentPosition;
1219 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1220 // scanEscapeCharacter make a side effect on this value and we need
1221 // the previous value few lines down this one
1222 scanDoubleQuotedEscapeCharacter();
1223 escapeSize = currentPosition - escapeSize;
1224 if (withoutUnicodePtr == 0) {
1225 // buffer all the entries that have been left aside....
1226 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1227 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1228 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1229 } else { // overwrite the / in the buffer
1230 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1231 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1232 // where only one is correct
1233 withoutUnicodePtr--;
1236 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1237 openDollarBrace = true;
1238 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1239 openDollarBrace = true;
1240 } else if (currentCharacter == '}') {
1241 openDollarBrace = false;
1242 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1243 if (recordLineSeparator) {
1244 pushLineSeparator();
1247 // consume next character
1248 unicodeAsBackSlash = false;
1249 currentCharacter = source[currentPosition++];
1250 if (withoutUnicodePtr != 0) {
1251 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1254 } catch (IndexOutOfBoundsException e) {
1255 // reset end position for error reporting
1256 currentPosition -= 2;
1257 throw new InvalidInputException(UNTERMINATED_STRING);
1258 } catch (InvalidInputException e) {
1259 if (e.getMessage().equals(INVALID_ESCAPE)) {
1260 // relocate if finding another quote fairly close: thus unicode
1261 // '/u000D' will be fully consumed
1262 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1263 if (currentPosition + lookAhead == source.length)
1265 if (source[currentPosition + lookAhead] == '\n')
1267 if (source[currentPosition + lookAhead] == '\"') {
1268 currentPosition += lookAhead + 1;
1275 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1276 // //$NON-NLS-?$ where ? is an
1278 if (currentLine == null) {
1279 currentLine = new NLSLine();
1280 lines.add(currentLine);
1282 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1286 public int getNextToken() throws InvalidInputException {
1287 phpExpressionTag = false;
1289 return getInlinedHTMLToken(currentPosition);
1292 this.wasAcr = false;
1294 jumpOverMethodBody();
1296 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1300 withoutUnicodePtr = 0;
1301 // start with a new token
1302 char encapsedChar = ' ';
1303 // if (!encapsedStringStack.isEmpty()) {
1304 // encapsedChar = ((Character)
1305 // encapsedStringStack.peek()).charValue();
1307 // if (encapsedChar != '$' && encapsedChar != ' ') {
1308 // currentCharacter = source[currentPosition++];
1309 // if (currentCharacter == encapsedChar) {
1310 // switch (currentCharacter) {
1312 // return TokenNameEncapsedString0;
1314 // return TokenNameEncapsedString1;
1316 // return TokenNameEncapsedString2;
1319 // while (currentCharacter != encapsedChar) {
1320 // /** ** in PHP \r and \n are valid in string literals *** */
1321 // switch (currentCharacter) {
1323 // int escapeSize = currentPosition;
1324 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1325 // //scanEscapeCharacter make a side effect on this value and
1326 // // we need the previous value few lines down this one
1327 // scanDoubleQuotedEscapeCharacter();
1328 // escapeSize = currentPosition - escapeSize;
1329 // if (withoutUnicodePtr == 0) {
1330 // //buffer all the entries that have been left aside....
1331 // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1333 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1334 // withoutUnicodePtr);
1335 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1336 // } else { //overwrite the / in the buffer
1337 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1338 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1339 // withoutUnicodePtr--;
1345 // if (recordLineSeparator) {
1346 // pushLineSeparator();
1350 // if (isPHPIdentifierStart(source[currentPosition]) ||
1351 // source[currentPosition] == '{') {
1352 // currentPosition--;
1353 // encapsedStringStack.push(new Character('$'));
1354 // return TokenNameSTRING;
1358 // if (source[currentPosition] == '$') { // CURLY_OPEN
1359 // currentPosition--;
1360 // encapsedStringStack.push(new Character('$'));
1361 // return TokenNameSTRING;
1364 // // consume next character
1365 // unicodeAsBackSlash = false;
1366 // currentCharacter = source[currentPosition++];
1367 // if (withoutUnicodePtr != 0) {
1368 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1372 // currentPosition--;
1373 // return TokenNameSTRING;
1375 // ---------Consume white space and handles startPosition---------
1376 int whiteStart = currentPosition;
1377 startPosition = currentPosition;
1378 currentCharacter = source[currentPosition++];
1379 // if (encapsedChar == '$') {
1380 // switch (currentCharacter) {
1382 // currentCharacter = source[currentPosition++];
1383 // return TokenNameSTRING;
1385 // if (encapsedChar == '$') {
1386 // if (getNextChar('$'))
1387 // return TokenNameLBRACE_DOLLAR;
1389 // return TokenNameLBRACE;
1391 // return TokenNameRBRACE;
1393 // return TokenNameLBRACKET;
1395 // return TokenNameRBRACKET;
1397 // if (tokenizeStrings) {
1398 // consumeStringConstant();
1399 // return TokenNameStringSingleQuote;
1401 // return TokenNameEncapsedString1;
1403 // return TokenNameEncapsedString2;
1405 // if (tokenizeStrings) {
1406 // consumeStringInterpolated();
1407 // return TokenNameStringInterpolated;
1409 // return TokenNameEncapsedString0;
1411 // if (getNextChar('>'))
1412 // return TokenNameMINUS_GREATER;
1413 // return TokenNameSTRING;
1415 // if (currentCharacter == '$') {
1416 // int oldPosition = currentPosition;
1418 // currentCharacter = source[currentPosition++];
1419 // if (currentCharacter == '{') {
1420 // return TokenNameDOLLAR_LBRACE;
1422 // if (isPHPIdentifierStart(currentCharacter)) {
1423 // return scanIdentifierOrKeyword(true);
1425 // currentPosition = oldPosition;
1426 // return TokenNameSTRING;
1428 // } catch (IndexOutOfBoundsException e) {
1429 // currentPosition = oldPosition;
1430 // return TokenNameSTRING;
1433 // if (isPHPIdentifierStart(currentCharacter))
1434 // return scanIdentifierOrKeyword(false);
1435 // if (Character.isDigit(currentCharacter))
1436 // return scanNumber(false);
1437 // return TokenNameERROR;
1440 // boolean isWhiteSpace;
1442 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1443 startPosition = currentPosition;
1444 currentCharacter = source[currentPosition++];
1445 // if (((currentCharacter = source[currentPosition++]) == '\\')
1446 // && (source[currentPosition] == 'u')) {
1447 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1449 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1450 checkNonExternalizeString();
1451 if (recordLineSeparator) {
1452 pushLineSeparator();
1457 // isWhiteSpace = (currentCharacter == ' ')
1458 // || Character.isWhitespace(currentCharacter);
1461 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1462 // reposition scanner in case we are interested by spaces as tokens
1464 startPosition = whiteStart;
1465 return TokenNameWHITESPACE;
1467 // little trick to get out in the middle of a source compuation
1468 if (currentPosition > eofPosition)
1469 return TokenNameEOF;
1470 // ---------Identify the next token-------------
1471 switch (currentCharacter) {
1473 return getCastOrParen();
1475 return TokenNameRPAREN;
1477 return TokenNameLBRACE;
1479 return TokenNameRBRACE;
1481 return TokenNameLBRACKET;
1483 return TokenNameRBRACKET;
1485 return TokenNameSEMICOLON;
1487 return TokenNameCOMMA;
1489 if (getNextChar('='))
1490 return TokenNameDOT_EQUAL;
1491 if (getNextCharAsDigit())
1492 return scanNumber(true);
1493 return TokenNameDOT;
1496 if ((test = getNextChar('+', '=')) == 0)
1497 return TokenNamePLUS_PLUS;
1499 return TokenNamePLUS_EQUAL;
1500 return TokenNamePLUS;
1504 if ((test = getNextChar('-', '=')) == 0)
1505 return TokenNameMINUS_MINUS;
1507 return TokenNameMINUS_EQUAL;
1508 if (getNextChar('>'))
1509 return TokenNameMINUS_GREATER;
1510 return TokenNameMINUS;
1513 if (getNextChar('='))
1514 return TokenNameTWIDDLE_EQUAL;
1515 return TokenNameTWIDDLE;
1517 if (getNextChar('=')) {
1518 if (getNextChar('=')) {
1519 return TokenNameNOT_EQUAL_EQUAL;
1521 return TokenNameNOT_EQUAL;
1523 return TokenNameNOT;
1525 if (getNextChar('='))
1526 return TokenNameMULTIPLY_EQUAL;
1527 return TokenNameMULTIPLY;
1529 if (getNextChar('='))
1530 return TokenNameREMAINDER_EQUAL;
1531 return TokenNameREMAINDER;
1533 int oldPosition = currentPosition;
1535 currentCharacter = source[currentPosition++];
1536 } catch (IndexOutOfBoundsException e) {
1537 currentPosition = oldPosition;
1538 return TokenNameLESS;
1540 switch (currentCharacter) {
1542 return TokenNameLESS_EQUAL;
1544 return TokenNameNOT_EQUAL;
1546 if (getNextChar('='))
1547 return TokenNameLEFT_SHIFT_EQUAL;
1548 if (getNextChar('<')) {
1549 currentCharacter = source[currentPosition++];
1550 while (Character.isWhitespace(currentCharacter)) {
1551 currentCharacter = source[currentPosition++];
1553 int heredocStart = currentPosition - 1;
1554 int heredocLength = 0;
1555 if (isPHPIdentifierStart(currentCharacter)) {
1556 currentCharacter = source[currentPosition++];
1558 return TokenNameERROR;
1560 while (isPHPIdentifierPart(currentCharacter)) {
1561 currentCharacter = source[currentPosition++];
1563 heredocLength = currentPosition - heredocStart - 1;
1564 // heredoc end-tag determination
1565 boolean endTag = true;
1568 ch = source[currentPosition++];
1569 if (ch == '\r' || ch == '\n') {
1570 if (recordLineSeparator) {
1571 pushLineSeparator();
1575 for (int i = 0; i < heredocLength; i++) {
1576 if (source[currentPosition + i] != source[heredocStart + i]) {
1582 currentPosition += heredocLength - 1;
1583 currentCharacter = source[currentPosition++];
1584 break; // do...while loop
1590 return TokenNameHEREDOC;
1592 return TokenNameLEFT_SHIFT;
1594 currentPosition = oldPosition;
1595 return TokenNameLESS;
1599 if ((test = getNextChar('=', '>')) == 0)
1600 return TokenNameGREATER_EQUAL;
1602 if ((test = getNextChar('=', '>')) == 0)
1603 return TokenNameRIGHT_SHIFT_EQUAL;
1604 return TokenNameRIGHT_SHIFT;
1606 return TokenNameGREATER;
1609 if (getNextChar('=')) {
1610 if (getNextChar('=')) {
1611 return TokenNameEQUAL_EQUAL_EQUAL;
1613 return TokenNameEQUAL_EQUAL;
1615 if (getNextChar('>'))
1616 return TokenNameEQUAL_GREATER;
1617 return TokenNameEQUAL;
1620 if ((test = getNextChar('&', '=')) == 0)
1621 return TokenNameAND_AND;
1623 return TokenNameAND_EQUAL;
1624 return TokenNameAND;
1628 if ((test = getNextChar('|', '=')) == 0)
1629 return TokenNameOR_OR;
1631 return TokenNameOR_EQUAL;
1635 if (getNextChar('='))
1636 return TokenNameXOR_EQUAL;
1637 return TokenNameXOR;
1639 if (getNextChar('>')) {
1641 if (currentPosition == source.length) {
1643 return TokenNameINLINE_HTML;
1645 return getInlinedHTMLToken(currentPosition - 2);
1647 return TokenNameQUESTION;
1649 if (getNextChar(':'))
1650 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1651 return TokenNameCOLON;
1655 consumeStringConstant();
1656 return TokenNameStringSingleQuote;
1658 // if (tokenizeStrings) {
1659 consumeStringLiteral();
1660 return TokenNameStringDoubleQuote;
1662 // return TokenNameEncapsedString2;
1664 // if (tokenizeStrings) {
1665 consumeStringInterpolated();
1666 return TokenNameStringInterpolated;
1668 // return TokenNameEncapsedString0;
1671 char startChar = currentCharacter;
1672 if (getNextChar('=') && startChar == '/') {
1673 return TokenNameDIVIDE_EQUAL;
1676 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1678 this.lastCommentLinePosition = this.currentPosition;
1679 int endPositionForLineComment = 0;
1680 try { // get the next char
1681 currentCharacter = source[currentPosition++];
1682 // if (((currentCharacter = source[currentPosition++])
1684 // && (source[currentPosition] == 'u')) {
1685 // //-------------unicode traitement ------------
1686 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1687 // currentPosition++;
1688 // while (source[currentPosition] == 'u') {
1689 // currentPosition++;
1692 // Character.getNumericValue(source[currentPosition++]))
1696 // Character.getNumericValue(source[currentPosition++]))
1700 // Character.getNumericValue(source[currentPosition++]))
1704 // Character.getNumericValue(source[currentPosition++]))
1708 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1710 // currentCharacter =
1711 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1714 // handle the \\u case manually into comment
1715 // if (currentCharacter == '\\') {
1716 // if (source[currentPosition] == '\\')
1717 // currentPosition++;
1718 // } //jump over the \\
1719 boolean isUnicode = false;
1720 while (currentCharacter != '\r' && currentCharacter != '\n') {
1721 this.lastCommentLinePosition = this.currentPosition;
1722 if (currentCharacter == '?') {
1723 if (getNextChar('>')) {
1724 // ?> breaks line comments
1725 startPosition = currentPosition - 2;
1727 return TokenNameINLINE_HTML;
1730 // get the next char
1732 currentCharacter = source[currentPosition++];
1733 // if (((currentCharacter = source[currentPosition++])
1735 // && (source[currentPosition] == 'u')) {
1736 // isUnicode = true;
1737 // //-------------unicode traitement ------------
1738 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1739 // currentPosition++;
1740 // while (source[currentPosition] == 'u') {
1741 // currentPosition++;
1744 // Character.getNumericValue(source[currentPosition++]))
1748 // Character.getNumericValue(
1749 // source[currentPosition++]))
1753 // Character.getNumericValue(
1754 // source[currentPosition++]))
1758 // Character.getNumericValue(
1759 // source[currentPosition++]))
1763 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1765 // currentCharacter =
1766 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1769 // handle the \\u case manually into comment
1770 // if (currentCharacter == '\\') {
1771 // if (source[currentPosition] == '\\')
1772 // currentPosition++;
1773 // } //jump over the \\
1776 endPositionForLineComment = currentPosition - 6;
1778 endPositionForLineComment = currentPosition - 1;
1780 // recordComment(false);
1781 recordComment(TokenNameCOMMENT_LINE);
1782 if (this.taskTags != null)
1783 checkTaskTag(this.startPosition, this.currentPosition);
1784 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1785 checkNonExternalizeString();
1786 if (recordLineSeparator) {
1788 pushUnicodeLineSeparator();
1790 pushLineSeparator();
1796 if (tokenizeComments) {
1798 currentPosition = endPositionForLineComment;
1799 // reset one character behind
1801 return TokenNameCOMMENT_LINE;
1803 } catch (IndexOutOfBoundsException e) { // an eof will them
1805 if (tokenizeComments) {
1807 // reset one character behind
1808 return TokenNameCOMMENT_LINE;
1814 // traditional and annotation comment
1815 boolean isJavadoc = false, star = false;
1816 // consume next character
1817 unicodeAsBackSlash = false;
1818 currentCharacter = source[currentPosition++];
1819 // if (((currentCharacter = source[currentPosition++]) ==
1821 // && (source[currentPosition] == 'u')) {
1822 // getNextUnicodeChar();
1824 // if (withoutUnicodePtr != 0) {
1825 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1826 // currentCharacter;
1829 if (currentCharacter == '*') {
1833 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1834 checkNonExternalizeString();
1835 if (recordLineSeparator) {
1836 pushLineSeparator();
1841 try { // get the next char
1842 currentCharacter = source[currentPosition++];
1843 // if (((currentCharacter = source[currentPosition++])
1845 // && (source[currentPosition] == 'u')) {
1846 // //-------------unicode traitement ------------
1847 // getNextUnicodeChar();
1849 // handle the \\u case manually into comment
1850 // if (currentCharacter == '\\') {
1851 // if (source[currentPosition] == '\\')
1852 // currentPosition++;
1853 // //jump over the \\
1855 // empty comment is not a javadoc /**/
1856 if (currentCharacter == '/') {
1859 // loop until end of comment */
1860 while ((currentCharacter != '/') || (!star)) {
1861 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1862 checkNonExternalizeString();
1863 if (recordLineSeparator) {
1864 pushLineSeparator();
1869 star = currentCharacter == '*';
1871 currentCharacter = source[currentPosition++];
1872 // if (((currentCharacter = source[currentPosition++])
1874 // && (source[currentPosition] == 'u')) {
1875 // //-------------unicode traitement ------------
1876 // getNextUnicodeChar();
1878 // handle the \\u case manually into comment
1879 // if (currentCharacter == '\\') {
1880 // if (source[currentPosition] == '\\')
1881 // currentPosition++;
1882 // } //jump over the \\
1884 // recordComment(isJavadoc);
1886 recordComment(TokenNameCOMMENT_PHPDOC);
1888 recordComment(TokenNameCOMMENT_BLOCK);
1891 if (tokenizeComments) {
1893 return TokenNameCOMMENT_PHPDOC;
1894 return TokenNameCOMMENT_BLOCK;
1897 if (this.taskTags != null) {
1898 checkTaskTag(this.startPosition, this.currentPosition);
1900 } catch (IndexOutOfBoundsException e) {
1901 // reset end position for error reporting
1902 currentPosition -= 2;
1903 throw new InvalidInputException(UNTERMINATED_COMMENT);
1907 return TokenNameDIVIDE;
1911 return TokenNameEOF;
1912 // the atEnd may not be <currentPosition == source.length> if
1913 // source is only some part of a real (external) stream
1914 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1916 if (currentCharacter == '$') {
1917 int oldPosition = currentPosition;
1919 currentCharacter = source[currentPosition++];
1920 if (isPHPIdentifierStart(currentCharacter)) {
1921 return scanIdentifierOrKeyword(true);
1923 currentPosition = oldPosition;
1924 return TokenNameDOLLAR;
1926 } catch (IndexOutOfBoundsException e) {
1927 currentPosition = oldPosition;
1928 return TokenNameDOLLAR;
1931 if (isPHPIdentifierStart(currentCharacter))
1932 return scanIdentifierOrKeyword(false);
1933 if (Character.isDigit(currentCharacter))
1934 return scanNumber(false);
1935 return TokenNameERROR;
1938 } // -----------------end switch while try--------------------
1939 catch (IndexOutOfBoundsException e) {
1942 return TokenNameEOF;
1947 * @throws InvalidInputException
1949 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1950 if (currentPosition > source.length) {
1951 currentPosition = source.length;
1952 return TokenNameEOF;
1954 startPosition = start;
1957 currentCharacter = source[currentPosition++];
1958 if (currentCharacter == '<') {
1959 if (getNextChar('?')) {
1960 currentCharacter = source[currentPosition++];
1961 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1962 if (currentCharacter != '=') { // <?=
1965 phpExpressionTag = true;
1968 if (ignorePHPOneLiner) { // for CodeFormatter
1969 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1971 return TokenNameINLINE_HTML;
1975 return TokenNameINLINE_HTML;
1978 // boolean phpStart = (currentCharacter == 'P') ||
1979 // (currentCharacter == 'p');
1981 int test = getNextChar('H', 'h');
1983 test = getNextChar('P', 'p');
1986 if (ignorePHPOneLiner) {
1987 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1989 return TokenNameINLINE_HTML;
1993 return TokenNameINLINE_HTML;
2001 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2002 if (recordLineSeparator) {
2003 pushLineSeparator();
2008 } // -----------------while--------------------
2010 return TokenNameINLINE_HTML;
2011 } // -----------------try--------------------
2012 catch (IndexOutOfBoundsException e) {
2013 startPosition = start;
2017 return TokenNameINLINE_HTML;
2023 private int lookAheadLinePHPTag() {
2024 // check if the PHP is only in this line (for CodeFormatter)
2025 int currentPositionInLine = currentPosition;
2026 char previousCharInLine = ' ';
2027 char currentCharInLine = ' ';
2028 boolean singleQuotedStringActive = false;
2029 boolean doubleQuotedStringActive = false;
2032 // look ahead in this line
2034 previousCharInLine = currentCharInLine;
2035 currentCharInLine = source[currentPositionInLine++];
2036 switch (currentCharInLine) {
2038 if (previousCharInLine == '?') {
2039 // update the scanner's current Position in the source
2040 currentPosition = currentPositionInLine;
2041 // use as "dummy" token
2042 return TokenNameEOF;
2046 if (doubleQuotedStringActive) {
2047 // ignore escaped characters in double quoted strings
2048 previousCharInLine = currentCharInLine;
2049 currentCharInLine = source[currentPositionInLine++];
2052 if (doubleQuotedStringActive) {
2053 doubleQuotedStringActive = false;
2055 if (!singleQuotedStringActive) {
2056 doubleQuotedStringActive = true;
2061 if (singleQuotedStringActive) {
2062 if (previousCharInLine != '\\') {
2063 singleQuotedStringActive = false;
2066 if (!doubleQuotedStringActive) {
2067 singleQuotedStringActive = true;
2073 return TokenNameINLINE_HTML;
2075 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2077 return TokenNameINLINE_HTML;
2081 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2083 return TokenNameINLINE_HTML;
2087 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2089 return TokenNameINLINE_HTML;
2094 } catch (IndexOutOfBoundsException e) {
2096 currentPosition = currentPositionInLine;
2097 return TokenNameINLINE_HTML;
2101 // public final void getNextUnicodeChar()
2102 // throws IndexOutOfBoundsException, InvalidInputException {
2104 // //handle the case of unicode.
2105 // //when a unicode appears then we must use a buffer that holds char
2107 // //At the end of this method currentCharacter holds the new visited char
2108 // //and currentPosition points right next after it
2110 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2112 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2113 // currentPosition++;
2114 // while (source[currentPosition] == 'u') {
2115 // currentPosition++;
2119 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2121 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2123 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2125 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2127 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2129 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2130 // //need the unicode buffer
2131 // if (withoutUnicodePtr == 0) {
2132 // //buffer all the entries that have been left aside....
2133 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2134 // System.arraycopy(
2137 // withoutUnicodeBuffer,
2139 // withoutUnicodePtr);
2141 // //fill the buffer with the char
2142 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2144 // unicodeAsBackSlash = currentCharacter == '\\';
2147 * Tokenize a method body, assuming that curly brackets are properly balanced.
2149 public final void jumpOverMethodBody() {
2150 this.wasAcr = false;
2153 while (true) { // loop for jumping over comments
2154 // ---------Consume white space and handles startPosition---------
2155 boolean isWhiteSpace;
2157 startPosition = currentPosition;
2158 currentCharacter = source[currentPosition++];
2159 // if (((currentCharacter = source[currentPosition++]) == '\\')
2160 // && (source[currentPosition] == 'u')) {
2161 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2163 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2164 pushLineSeparator();
2165 isWhiteSpace = Character.isWhitespace(currentCharacter);
2167 } while (isWhiteSpace);
2168 // -------consume token until } is found---------
2169 switch (currentCharacter) {
2180 test = getNextChar('\\');
2183 scanDoubleQuotedEscapeCharacter();
2184 } catch (InvalidInputException ex) {
2188 // try { // consume next character
2189 unicodeAsBackSlash = false;
2190 currentCharacter = source[currentPosition++];
2191 // if (((currentCharacter = source[currentPosition++]) == '\\')
2192 // && (source[currentPosition] == 'u')) {
2193 // getNextUnicodeChar();
2195 if (withoutUnicodePtr != 0) {
2196 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2199 // } catch (InvalidInputException ex) {
2207 // try { // consume next character
2208 unicodeAsBackSlash = false;
2209 currentCharacter = source[currentPosition++];
2210 // if (((currentCharacter = source[currentPosition++]) == '\\')
2211 // && (source[currentPosition] == 'u')) {
2212 // getNextUnicodeChar();
2214 if (withoutUnicodePtr != 0) {
2215 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2218 // } catch (InvalidInputException ex) {
2220 while (currentCharacter != '"') {
2221 if (currentCharacter == '\r') {
2222 if (source[currentPosition] == '\n')
2225 // the string cannot go further that the line
2227 if (currentCharacter == '\n') {
2229 // the string cannot go further that the line
2231 if (currentCharacter == '\\') {
2233 scanDoubleQuotedEscapeCharacter();
2234 } catch (InvalidInputException ex) {
2238 // try { // consume next character
2239 unicodeAsBackSlash = false;
2240 currentCharacter = source[currentPosition++];
2241 // if (((currentCharacter = source[currentPosition++]) == '\\')
2242 // && (source[currentPosition] == 'u')) {
2243 // getNextUnicodeChar();
2245 if (withoutUnicodePtr != 0) {
2246 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2249 // } catch (InvalidInputException ex) {
2252 } catch (IndexOutOfBoundsException e) {
2258 if ((test = getNextChar('/', '*')) == 0) {
2261 // get the next char
2262 currentCharacter = source[currentPosition++];
2263 // if (((currentCharacter = source[currentPosition++]) ==
2265 // && (source[currentPosition] == 'u')) {
2266 // //-------------unicode traitement ------------
2267 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2268 // currentPosition++;
2269 // while (source[currentPosition] == 'u') {
2270 // currentPosition++;
2273 // Character.getNumericValue(source[currentPosition++]))
2277 // Character.getNumericValue(source[currentPosition++]))
2281 // Character.getNumericValue(source[currentPosition++]))
2285 // Character.getNumericValue(source[currentPosition++]))
2288 // //error don't care of the value
2289 // currentCharacter = 'A';
2290 // } //something different from \n and \r
2292 // currentCharacter =
2293 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2296 while (currentCharacter != '\r' && currentCharacter != '\n') {
2297 // get the next char
2298 currentCharacter = source[currentPosition++];
2299 // if (((currentCharacter = source[currentPosition++])
2301 // && (source[currentPosition] == 'u')) {
2302 // //-------------unicode traitement ------------
2303 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2304 // currentPosition++;
2305 // while (source[currentPosition] == 'u') {
2306 // currentPosition++;
2309 // Character.getNumericValue(source[currentPosition++]))
2313 // Character.getNumericValue(source[currentPosition++]))
2317 // Character.getNumericValue(source[currentPosition++]))
2321 // Character.getNumericValue(source[currentPosition++]))
2324 // //error don't care of the value
2325 // currentCharacter = 'A';
2326 // } //something different from \n and \r
2328 // currentCharacter =
2329 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2333 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2334 pushLineSeparator();
2335 } catch (IndexOutOfBoundsException e) {
2336 } // an eof will them be generated
2340 // traditional and annotation comment
2341 boolean star = false;
2342 // try { // consume next character
2343 unicodeAsBackSlash = false;
2344 currentCharacter = source[currentPosition++];
2345 // if (((currentCharacter = source[currentPosition++]) == '\\')
2346 // && (source[currentPosition] == 'u')) {
2347 // getNextUnicodeChar();
2349 if (withoutUnicodePtr != 0) {
2350 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2353 // } catch (InvalidInputException ex) {
2355 if (currentCharacter == '*') {
2358 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2359 pushLineSeparator();
2360 try { // get the next char
2361 currentCharacter = source[currentPosition++];
2362 // if (((currentCharacter = source[currentPosition++]) ==
2364 // && (source[currentPosition] == 'u')) {
2365 // //-------------unicode traitement ------------
2366 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2367 // currentPosition++;
2368 // while (source[currentPosition] == 'u') {
2369 // currentPosition++;
2372 // Character.getNumericValue(source[currentPosition++]))
2376 // Character.getNumericValue(source[currentPosition++]))
2380 // Character.getNumericValue(source[currentPosition++]))
2384 // Character.getNumericValue(source[currentPosition++]))
2387 // //error don't care of the value
2388 // currentCharacter = 'A';
2389 // } //something different from * and /
2391 // currentCharacter =
2392 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2395 // loop until end of comment */
2396 while ((currentCharacter != '/') || (!star)) {
2397 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2398 pushLineSeparator();
2399 star = currentCharacter == '*';
2401 currentCharacter = source[currentPosition++];
2402 // if (((currentCharacter = source[currentPosition++])
2404 // && (source[currentPosition] == 'u')) {
2405 // //-------------unicode traitement ------------
2406 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2407 // currentPosition++;
2408 // while (source[currentPosition] == 'u') {
2409 // currentPosition++;
2412 // Character.getNumericValue(source[currentPosition++]))
2416 // Character.getNumericValue(source[currentPosition++]))
2420 // Character.getNumericValue(source[currentPosition++]))
2424 // Character.getNumericValue(source[currentPosition++]))
2427 // //error don't care of the value
2428 // currentCharacter = 'A';
2429 // } //something different from * and /
2431 // currentCharacter =
2432 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2436 } catch (IndexOutOfBoundsException e) {
2444 if (isPHPIdentOrVarStart(currentCharacter) ) {
2446 scanIdentifierOrKeyword((currentCharacter == '$'));
2447 } catch (InvalidInputException ex) {
2452 if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
2453 // if (Character.isDigit(currentCharacter)) {
2456 } catch (InvalidInputException ex) {
2463 // -----------------end switch while try--------------------
2464 } catch (IndexOutOfBoundsException e) {
2465 } catch (InvalidInputException e) {
2470 // public final boolean jumpOverUnicodeWhiteSpace()
2471 // throws InvalidInputException {
2473 // //handle the case of unicode. Jump over the next whiteSpace
2474 // //making startPosition pointing on the next available char
2475 // //On false, the currentCharacter is filled up with a potential
2479 // this.wasAcr = false;
2480 // int c1, c2, c3, c4;
2481 // int unicodeSize = 6;
2482 // currentPosition++;
2483 // while (source[currentPosition] == 'u') {
2484 // currentPosition++;
2488 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2490 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2492 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2494 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2496 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2499 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2500 // if (recordLineSeparator
2501 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2502 // pushLineSeparator();
2503 // if (Character.isWhitespace(currentCharacter))
2506 // //buffer the new char which is not a white space
2507 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2508 // //withoutUnicodePtr == 1 is true here
2510 // } catch (IndexOutOfBoundsException e) {
2511 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2514 public final int[] getLineEnds() {
2515 // return a bounded copy of this.lineEnds
2517 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2521 public char[] getSource() {
2525 public static boolean isIdentifierOrKeyword(int token) {
2526 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2529 final char[] optimizedCurrentTokenSource1() {
2530 // return always the same char[] build only once
2531 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2532 char charOne = source[startPosition];
2587 return new char[] { charOne };
2591 final char[] optimizedCurrentTokenSource2() {
2593 c0 = source[startPosition];
2594 c1 = source[startPosition + 1];
2596 // return always the same char[] build only once
2597 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2600 return charArray_va;
2602 return charArray_vb;
2604 return charArray_vc;
2606 return charArray_vd;
2608 return charArray_ve;
2610 return charArray_vf;
2612 return charArray_vg;
2614 return charArray_vh;
2616 return charArray_vi;
2618 return charArray_vj;
2620 return charArray_vk;
2622 return charArray_vl;
2624 return charArray_vm;
2626 return charArray_vn;
2628 return charArray_vo;
2630 return charArray_vp;
2632 return charArray_vq;
2634 return charArray_vr;
2636 return charArray_vs;
2638 return charArray_vt;
2640 return charArray_vu;
2642 return charArray_vv;
2644 return charArray_vw;
2646 return charArray_vx;
2648 return charArray_vy;
2650 return charArray_vz;
2653 // try to return the same char[] build only once
2654 int hash = ((c0 << 6) + c1) % TableSize;
2655 char[][] table = charArray_length[0][hash];
2657 while (++i < InternalTableSize) {
2658 char[] charArray = table[i];
2659 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2662 // ---------other side---------
2664 int max = newEntry2;
2665 while (++i <= max) {
2666 char[] charArray = table[i];
2667 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2670 // --------add the entry-------
2671 if (++max >= InternalTableSize)
2674 table[max] = (r = new char[] { c0, c1 });
2679 final char[] optimizedCurrentTokenSource3() {
2680 // try to return the same char[] build only once
2682 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2684 char[][] table = charArray_length[1][hash];
2686 while (++i < InternalTableSize) {
2687 char[] charArray = table[i];
2688 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2691 // ---------other side---------
2693 int max = newEntry3;
2694 while (++i <= max) {
2695 char[] charArray = table[i];
2696 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2699 // --------add the entry-------
2700 if (++max >= InternalTableSize)
2703 table[max] = (r = new char[] { c0, c1, c2 });
2708 final char[] optimizedCurrentTokenSource4() {
2709 // try to return the same char[] build only once
2710 char c0, c1, c2, c3;
2711 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2712 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2714 char[][] table = charArray_length[2][(int) hash];
2716 while (++i < InternalTableSize) {
2717 char[] charArray = table[i];
2718 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2721 // ---------other side---------
2723 int max = newEntry4;
2724 while (++i <= max) {
2725 char[] charArray = table[i];
2726 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2729 // --------add the entry-------
2730 if (++max >= InternalTableSize)
2733 table[max] = (r = new char[] { c0, c1, c2, c3 });
2738 final char[] optimizedCurrentTokenSource5() {
2739 // try to return the same char[] build only once
2740 char c0, c1, c2, c3, c4;
2741 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2742 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2744 char[][] table = charArray_length[3][(int) hash];
2746 while (++i < InternalTableSize) {
2747 char[] charArray = table[i];
2748 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2751 // ---------other side---------
2753 int max = newEntry5;
2754 while (++i <= max) {
2755 char[] charArray = table[i];
2756 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2759 // --------add the entry-------
2760 if (++max >= InternalTableSize)
2763 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2768 final char[] optimizedCurrentTokenSource6() {
2769 // try to return the same char[] build only once
2770 char c0, c1, c2, c3, c4, c5;
2771 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2772 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2773 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2775 char[][] table = charArray_length[4][(int) hash];
2777 while (++i < InternalTableSize) {
2778 char[] charArray = table[i];
2779 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2780 && (c5 == charArray[5]))
2783 // ---------other side---------
2785 int max = newEntry6;
2786 while (++i <= max) {
2787 char[] charArray = table[i];
2788 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2789 && (c5 == charArray[5]))
2792 // --------add the entry-------
2793 if (++max >= InternalTableSize)
2796 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2801 public final void pushLineSeparator() throws InvalidInputException {
2802 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2803 final int INCREMENT = 250;
2804 if (this.checkNonExternalizedStringLiterals) {
2805 // reinitialize the current line for non externalize strings purpose
2808 // currentCharacter is at position currentPosition-1
2810 if (currentCharacter == '\r') {
2811 int separatorPos = currentPosition - 1;
2812 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2814 // System.out.println("CR-" + separatorPos);
2816 lineEnds[++linePtr] = separatorPos;
2817 } catch (IndexOutOfBoundsException e) {
2818 // linePtr value is correct
2819 int oldLength = lineEnds.length;
2820 int[] old = lineEnds;
2821 lineEnds = new int[oldLength + INCREMENT];
2822 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2823 lineEnds[linePtr] = separatorPos;
2825 // look-ahead for merged cr+lf
2827 if (source[currentPosition] == '\n') {
2828 // System.out.println("look-ahead LF-" + currentPosition);
2829 lineEnds[linePtr] = currentPosition;
2835 } catch (IndexOutOfBoundsException e) {
2840 if (currentCharacter == '\n') {
2841 // must merge eventual cr followed by lf
2842 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2843 // System.out.println("merge LF-" + (currentPosition - 1));
2844 lineEnds[linePtr] = currentPosition - 1;
2846 int separatorPos = currentPosition - 1;
2847 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2849 // System.out.println("LF-" + separatorPos);
2851 lineEnds[++linePtr] = separatorPos;
2852 } catch (IndexOutOfBoundsException e) {
2853 // linePtr value is correct
2854 int oldLength = lineEnds.length;
2855 int[] old = lineEnds;
2856 lineEnds = new int[oldLength + INCREMENT];
2857 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2858 lineEnds[linePtr] = separatorPos;
2866 public final void pushUnicodeLineSeparator() {
2867 // isUnicode means that the \r or \n has been read as a unicode character
2868 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2869 final int INCREMENT = 250;
2870 // currentCharacter is at position currentPosition-1
2871 if (this.checkNonExternalizedStringLiterals) {
2872 // reinitialize the current line for non externalize strings purpose
2876 if (currentCharacter == '\r') {
2877 int separatorPos = currentPosition - 6;
2878 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2880 // System.out.println("CR-" + separatorPos);
2882 lineEnds[++linePtr] = separatorPos;
2883 } catch (IndexOutOfBoundsException e) {
2884 // linePtr value is correct
2885 int oldLength = lineEnds.length;
2886 int[] old = lineEnds;
2887 lineEnds = new int[oldLength + INCREMENT];
2888 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2889 lineEnds[linePtr] = separatorPos;
2891 // look-ahead for merged cr+lf
2892 if (source[currentPosition] == '\n') {
2893 // System.out.println("look-ahead LF-" + currentPosition);
2894 lineEnds[linePtr] = currentPosition;
2902 if (currentCharacter == '\n') {
2903 // must merge eventual cr followed by lf
2904 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2905 // System.out.println("merge LF-" + (currentPosition - 1));
2906 lineEnds[linePtr] = currentPosition - 6;
2908 int separatorPos = currentPosition - 6;
2909 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2911 // System.out.println("LF-" + separatorPos);
2913 lineEnds[++linePtr] = separatorPos;
2914 } catch (IndexOutOfBoundsException e) {
2915 // linePtr value is correct
2916 int oldLength = lineEnds.length;
2917 int[] old = lineEnds;
2918 lineEnds = new int[oldLength + INCREMENT];
2919 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2920 lineEnds[linePtr] = separatorPos;
2928 public void recordComment(int token) {
2930 int stopPosition = this.currentPosition;
2932 case TokenNameCOMMENT_LINE:
2933 stopPosition = -this.lastCommentLinePosition;
2935 case TokenNameCOMMENT_BLOCK:
2936 stopPosition = -this.currentPosition;
2940 // a new comment is recorded
2941 int length = this.commentStops.length;
2942 if (++this.commentPtr >= length) {
2943 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2944 // grows the positions buffers too
2945 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2947 this.commentStops[this.commentPtr] = stopPosition;
2948 this.commentStarts[this.commentPtr] = this.startPosition;
2951 // public final void recordComment(boolean isJavadoc) {
2952 // // a new annotation comment is recorded
2954 // commentStops[++commentPtr] = isJavadoc
2955 // ? currentPosition
2956 // : -currentPosition;
2957 // } catch (IndexOutOfBoundsException e) {
2958 // int oldStackLength = commentStops.length;
2959 // int[] oldStack = commentStops;
2960 // commentStops = new int[oldStackLength + 30];
2961 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2962 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2963 // //grows the positions buffers too
2964 // int[] old = commentStarts;
2965 // commentStarts = new int[oldStackLength + 30];
2966 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2968 // //the buffer is of a correct size here
2969 // commentStarts[commentPtr] = startPosition;
2971 public void resetTo(int begin, int end) {
2972 // reset the scanner to a given position where it may rescan again
2974 initialPosition = startPosition = currentPosition = begin;
2975 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2976 commentPtr = -1; // reset comment stack
2979 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2980 // the string with "\\u" is a legal string of two chars \ and u
2981 // thus we use a direct access to the source (for regular cases).
2982 // if (unicodeAsBackSlash) {
2983 // // consume next character
2984 // unicodeAsBackSlash = false;
2985 // if (((currentCharacter = source[currentPosition++]) == '\\')
2986 // && (source[currentPosition] == 'u')) {
2987 // getNextUnicodeChar();
2989 // if (withoutUnicodePtr != 0) {
2990 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2994 currentCharacter = source[currentPosition++];
2995 switch (currentCharacter) {
2997 currentCharacter = '\'';
3000 currentCharacter = '\\';
3003 currentCharacter = '\\';
3008 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3009 currentCharacter = source[currentPosition++];
3010 switch (currentCharacter) {
3012 // currentCharacter = '\b';
3015 currentCharacter = '\t';
3018 currentCharacter = '\n';
3021 // currentCharacter = '\f';
3024 currentCharacter = '\r';
3027 currentCharacter = '\"';
3030 currentCharacter = '\'';
3033 currentCharacter = '\\';
3036 currentCharacter = '$';
3039 // -----------octal escape--------------
3041 // OctalDigit OctalDigit
3042 // ZeroToThree OctalDigit OctalDigit
3043 int number = Character.getNumericValue(currentCharacter);
3044 if (number >= 0 && number <= 7) {
3045 boolean zeroToThreeNot = number > 3;
3046 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3047 int digit = Character.getNumericValue(currentCharacter);
3048 if (digit >= 0 && digit <= 7) {
3049 number = (number * 8) + digit;
3050 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3051 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3052 // Digit --> ignore last character
3055 digit = Character.getNumericValue(currentCharacter);
3056 if (digit >= 0 && digit <= 7) {
3057 // has read \ZeroToThree OctalDigit OctalDigit
3058 number = (number * 8) + digit;
3059 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3060 // --> ignore last character
3064 } else { // has read \OctalDigit NonDigit--> ignore last
3068 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3072 } else { // has read \OctalDigit --> ignore last character
3076 throw new InvalidInputException(INVALID_ESCAPE);
3077 currentCharacter = (char) number;
3080 // throw new InvalidInputException(INVALID_ESCAPE);
3084 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3085 // return scanIdentifierOrKeyword( false );
3087 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3089 // first dispatch on the first char.
3090 // then the length. If there are several
3091 // keywors with the same length AND the same first char, then do another
3092 // disptach on the second char :-)...cool....but fast !
3093 useAssertAsAnIndentifier = false;
3094 while (getNextCharAsJavaIdentifierPart()) {
3098 // if (new String(getCurrentTokenSource()).equals("$this")) {
3099 // return TokenNamethis;
3101 return TokenNameVariable;
3106 // if (withoutUnicodePtr == 0)
3107 // quick test on length == 1 but not on length > 12 while most identifier
3108 // have a length which is <= 12...but there are lots of identifier with
3109 // only one char....
3111 if ((length = currentPosition - startPosition) == 1)
3112 return TokenNameIdentifier;
3114 data = new char[length];
3115 index = startPosition;
3116 for (int i = 0; i < length; i++) {
3117 data[i] = Character.toLowerCase(source[index + i]);
3121 // if ((length = withoutUnicodePtr) == 1)
3122 // return TokenNameIdentifier;
3123 // // data = withoutUnicodeBuffer;
3124 // data = new char[withoutUnicodeBuffer.length];
3125 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3126 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3130 firstLetter = data[index];
3131 switch (firstLetter) {
3136 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3137 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3138 return TokenNameFILE;
3139 index = 0; // __LINE__
3140 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3141 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3142 return TokenNameLINE;
3146 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3147 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3148 return TokenNameCLASS_C;
3152 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3153 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3154 && (data[++index] == '_'))
3155 return TokenNameMETHOD_C;
3159 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3160 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3161 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3162 return TokenNameFUNC_C;
3165 return TokenNameIdentifier;
3167 // as and array abstract
3171 if ((data[++index] == 's')) {
3174 return TokenNameIdentifier;
3178 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3179 return TokenNameand;
3181 return TokenNameIdentifier;
3185 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3186 return TokenNamearray;
3188 return TokenNameIdentifier;
3190 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3191 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3192 return TokenNameabstract;
3194 return TokenNameIdentifier;
3196 return TokenNameIdentifier;
3202 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3203 return TokenNamebreak;
3205 return TokenNameIdentifier;
3207 return TokenNameIdentifier;
3210 // case catch class clone const continue
3213 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3214 return TokenNamecase;
3216 return TokenNameIdentifier;
3218 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3219 return TokenNamecatch;
3221 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3222 return TokenNameclass;
3224 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3225 return TokenNameclone;
3227 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3228 return TokenNameconst;
3230 return TokenNameIdentifier;
3232 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3233 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3234 return TokenNamecontinue;
3236 return TokenNameIdentifier;
3238 return TokenNameIdentifier;
3241 // declare default do die
3242 // TODO delete define ==> no keyword !
3245 if ((data[++index] == 'o'))
3248 return TokenNameIdentifier;
3250 // if ((data[++index] == 'e')
3251 // && (data[++index] == 'f')
3252 // && (data[++index] == 'i')
3253 // && (data[++index] == 'n')
3254 // && (data[++index] == 'e'))
3255 // return TokenNamedefine;
3257 // return TokenNameIdentifier;
3259 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3260 && (data[++index] == 'r') && (data[++index] == 'e'))
3261 return TokenNamedeclare;
3263 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3264 && (data[++index] == 'l') && (data[++index] == 't'))
3265 return TokenNamedefault;
3267 return TokenNameIdentifier;
3269 return TokenNameIdentifier;
3272 // echo else exit elseif extends eval
3275 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3276 return TokenNameecho;
3277 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3278 return TokenNameelse;
3279 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3280 return TokenNameexit;
3281 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3282 return TokenNameeval;
3284 return TokenNameIdentifier;
3287 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3288 return TokenNameendif;
3289 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3290 return TokenNameempty;
3292 return TokenNameIdentifier;
3295 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3296 && (data[++index] == 'r'))
3297 return TokenNameendfor;
3298 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3299 && (data[++index] == 'f'))
3300 return TokenNameelseif;
3302 return TokenNameIdentifier;
3304 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3305 && (data[++index] == 'd') && (data[++index] == 's'))
3306 return TokenNameextends;
3308 return TokenNameIdentifier;
3311 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3312 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3313 return TokenNameendwhile;
3315 return TokenNameIdentifier;
3318 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3319 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3320 return TokenNameendswitch;
3322 return TokenNameIdentifier;
3325 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3326 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3327 && (data[++index] == 'e'))
3328 return TokenNameenddeclare;
3330 if ((data[++index] == 'n') // endforeach
3331 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3332 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3333 return TokenNameendforeach;
3335 return TokenNameIdentifier;
3337 return TokenNameIdentifier;
3340 // for false final function
3343 if ((data[++index] == 'o') && (data[++index] == 'r'))
3344 return TokenNamefor;
3346 return TokenNameIdentifier;
3348 // if ((data[++index] == 'a') && (data[++index] == 'l')
3349 // && (data[++index] == 's') && (data[++index] == 'e'))
3350 // return TokenNamefalse;
3351 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3352 return TokenNamefinal;
3354 return TokenNameIdentifier;
3357 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3358 && (data[++index] == 'c') && (data[++index] == 'h'))
3359 return TokenNameforeach;
3361 return TokenNameIdentifier;
3364 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3365 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3366 return TokenNamefunction;
3368 return TokenNameIdentifier;
3370 return TokenNameIdentifier;
3375 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3376 && (data[++index] == 'l')) {
3377 return TokenNameglobal;
3380 return TokenNameIdentifier;
3382 // if int isset include include_once instanceof interface implements
3385 if (data[++index] == 'f')
3388 return TokenNameIdentifier;
3390 // if ((data[++index] == 'n') && (data[++index] == 't'))
3391 // return TokenNameint;
3393 // return TokenNameIdentifier;
3395 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3396 return TokenNameisset;
3398 return TokenNameIdentifier;
3400 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3401 && (data[++index] == 'd') && (data[++index] == 'e'))
3402 return TokenNameinclude;
3404 return TokenNameIdentifier;
3407 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3408 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3409 return TokenNameinterface;
3411 return TokenNameIdentifier;
3414 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3415 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3416 && (data[++index] == 'f'))
3417 return TokenNameinstanceof;
3418 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3419 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3420 && (data[++index] == 's'))
3421 return TokenNameimplements;
3423 return TokenNameIdentifier;
3425 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3426 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3427 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3428 return TokenNameinclude_once;
3430 return TokenNameIdentifier;
3432 return TokenNameIdentifier;
3437 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3438 return TokenNamelist;
3441 return TokenNameIdentifier;
3446 if ((data[++index] == 'e') && (data[++index] == 'w'))
3447 return TokenNamenew;
3449 return TokenNameIdentifier;
3451 // if ((data[++index] == 'u') && (data[++index] == 'l')
3452 // && (data[++index] == 'l'))
3453 // return TokenNamenull;
3455 // return TokenNameIdentifier;
3457 return TokenNameIdentifier;
3462 if (data[++index] == 'r') {
3466 // if (length == 12) {
3467 // if ((data[++index] == 'l')
3468 // && (data[++index] == 'd')
3469 // && (data[++index] == '_')
3470 // && (data[++index] == 'f')
3471 // && (data[++index] == 'u')
3472 // && (data[++index] == 'n')
3473 // && (data[++index] == 'c')
3474 // && (data[++index] == 't')
3475 // && (data[++index] == 'i')
3476 // && (data[++index] == 'o')
3477 // && (data[++index] == 'n')) {
3478 // return TokenNameold_function;
3481 return TokenNameIdentifier;
3483 // print public private protected
3486 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3487 return TokenNameprint;
3489 return TokenNameIdentifier;
3491 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3492 && (data[++index] == 'c')) {
3493 return TokenNamepublic;
3495 return TokenNameIdentifier;
3497 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3498 && (data[++index] == 't') && (data[++index] == 'e')) {
3499 return TokenNameprivate;
3501 return TokenNameIdentifier;
3503 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3504 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3505 return TokenNameprotected;
3507 return TokenNameIdentifier;
3509 return TokenNameIdentifier;
3511 // return require require_once
3513 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3514 && (data[++index] == 'n')) {
3515 return TokenNamereturn;
3517 } else if (length == 7) {
3518 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3519 && (data[++index] == 'r') && (data[++index] == 'e')) {
3520 return TokenNamerequire;
3522 } else if (length == 12) {
3523 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3524 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3525 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3526 return TokenNamerequire_once;
3529 return TokenNameIdentifier;
3534 if (data[++index] == 't')
3535 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3536 return TokenNamestatic;
3538 return TokenNameIdentifier;
3539 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3540 && (data[++index] == 'h'))
3541 return TokenNameswitch;
3543 return TokenNameIdentifier;
3545 return TokenNameIdentifier;
3551 if ((data[++index] == 'r') && (data[++index] == 'y'))
3552 return TokenNametry;
3554 return TokenNameIdentifier;
3556 // if ((data[++index] == 'r') && (data[++index] == 'u')
3557 // && (data[++index] == 'e'))
3558 // return TokenNametrue;
3560 // return TokenNameIdentifier;
3562 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3563 return TokenNamethrow;
3565 return TokenNameIdentifier;
3567 return TokenNameIdentifier;
3573 if ((data[++index] == 's') && (data[++index] == 'e'))
3574 return TokenNameuse;
3576 return TokenNameIdentifier;
3578 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3579 return TokenNameunset;
3581 return TokenNameIdentifier;
3583 return TokenNameIdentifier;
3589 if ((data[++index] == 'a') && (data[++index] == 'r'))
3590 return TokenNamevar;
3592 return TokenNameIdentifier;
3594 return TokenNameIdentifier;
3600 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3601 return TokenNamewhile;
3603 return TokenNameIdentifier;
3604 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3605 // (data[++index]=='e') && (data[++index]=='f')&&
3606 // (data[++index]=='p'))
3607 // return TokenNamewidefp ;
3609 // return TokenNameIdentifier;
3611 return TokenNameIdentifier;
3617 if ((data[++index] == 'o') && (data[++index] == 'r'))
3618 return TokenNamexor;
3620 return TokenNameIdentifier;
3622 return TokenNameIdentifier;
3625 return TokenNameIdentifier;
3629 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3630 // when entering this method the currentCharacter is the firt
3631 // digit of the number , i.e. it may be preceeded by a . when
3632 // dotPrefix is true
3633 boolean floating = dotPrefix;
3634 if ((!dotPrefix) && (currentCharacter == '0')) {
3635 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3636 // force the first char of the hexa number do exist...
3637 // consume next character
3638 unicodeAsBackSlash = false;
3639 currentCharacter = source[currentPosition++];
3640 // if (((currentCharacter = source[currentPosition++]) == '\\')
3641 // && (source[currentPosition] == 'u')) {
3642 // getNextUnicodeChar();
3644 // if (withoutUnicodePtr != 0) {
3645 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3648 if (Character.digit(currentCharacter, 16) == -1)
3649 throw new InvalidInputException(INVALID_HEXA);
3651 while (getNextCharAsDigit(16)) {
3654 // if (getNextChar('l', 'L') >= 0)
3655 // return TokenNameLongLiteral;
3657 return TokenNameIntegerLiteral;
3659 // there is x or X in the number
3660 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3661 // 00078.0 is true !!!!! crazy language
3662 if (getNextCharAsDigit()) {
3663 // -------------potential octal-----------------
3664 while (getNextCharAsDigit()) {
3667 // if (getNextChar('l', 'L') >= 0) {
3668 // return TokenNameLongLiteral;
3671 // if (getNextChar('f', 'F') >= 0) {
3672 // return TokenNameFloatingPointLiteral;
3674 if (getNextChar('d', 'D') >= 0) {
3675 return TokenNameDoubleLiteral;
3676 } else { // make the distinction between octal and float ....
3677 if (getNextChar('.')) { // bingo ! ....
3678 while (getNextCharAsDigit()) {
3681 if (getNextChar('e', 'E') >= 0) {
3682 // consume next character
3683 unicodeAsBackSlash = false;
3684 currentCharacter = source[currentPosition++];
3685 // if (((currentCharacter = source[currentPosition++]) == '\\')
3686 // && (source[currentPosition] == 'u')) {
3687 // getNextUnicodeChar();
3689 // if (withoutUnicodePtr != 0) {
3690 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3693 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3694 // consume next character
3695 unicodeAsBackSlash = false;
3696 currentCharacter = source[currentPosition++];
3697 // if (((currentCharacter = source[currentPosition++]) == '\\')
3698 // && (source[currentPosition] == 'u')) {
3699 // getNextUnicodeChar();
3701 // if (withoutUnicodePtr != 0) {
3702 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3703 // currentCharacter;
3707 if (!Character.isDigit(currentCharacter))
3708 throw new InvalidInputException(INVALID_FLOAT);
3709 while (getNextCharAsDigit()) {
3713 // if (getNextChar('f', 'F') >= 0)
3714 // return TokenNameFloatingPointLiteral;
3715 getNextChar('d', 'D'); // jump over potential d or D
3716 return TokenNameDoubleLiteral;
3718 return TokenNameIntegerLiteral;
3725 while (getNextCharAsDigit()) {
3728 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3729 // return TokenNameLongLiteral;
3730 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3731 while (getNextCharAsDigit()) {
3736 // if floating is true both exponant and suffix may be optional
3737 if (getNextChar('e', 'E') >= 0) {
3739 // consume next character
3740 unicodeAsBackSlash = false;
3741 currentCharacter = source[currentPosition++];
3742 // if (((currentCharacter = source[currentPosition++]) == '\\')
3743 // && (source[currentPosition] == 'u')) {
3744 // getNextUnicodeChar();
3746 // if (withoutUnicodePtr != 0) {
3747 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3750 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3753 unicodeAsBackSlash = false;
3754 currentCharacter = source[currentPosition++];
3755 // if (((currentCharacter = source[currentPosition++]) == '\\')
3756 // && (source[currentPosition] == 'u')) {
3757 // getNextUnicodeChar();
3759 // if (withoutUnicodePtr != 0) {
3760 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3764 if (!Character.isDigit(currentCharacter))
3765 throw new InvalidInputException(INVALID_FLOAT);
3766 while (getNextCharAsDigit()) {
3770 if (getNextChar('d', 'D') >= 0)
3771 return TokenNameDoubleLiteral;
3772 // if (getNextChar('f', 'F') >= 0)
3773 // return TokenNameFloatingPointLiteral;
3774 // the long flag has been tested before
3775 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3779 * Search the line number corresponding to a specific position
3782 public final int getLineNumber(int position) {
3783 if (lineEnds == null)
3785 int length = linePtr + 1;
3788 int g = 0, d = length - 1;
3792 if (position < lineEnds[m]) {
3794 } else if (position > lineEnds[m]) {
3800 if (position < lineEnds[m]) {
3806 public void setPHPMode(boolean mode) {
3810 public final void setSource(char[] source) {
3811 setSource(null, source);
3814 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3815 // the source-buffer is set to sourceString
3816 this.compilationUnit = compilationUnit;
3817 if (source == null) {
3818 this.source = new char[0];
3820 this.source = source;
3823 initialPosition = currentPosition = 0;
3824 containsAssertKeyword = false;
3825 withoutUnicodeBuffer = new char[this.source.length];
3826 // encapsedStringStack = new Stack();
3829 public String toString() {
3830 if (startPosition == source.length)
3831 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3832 if (currentPosition > source.length)
3833 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3834 char front[] = new char[startPosition];
3835 System.arraycopy(source, 0, front, 0, startPosition);
3836 int middleLength = (currentPosition - 1) - startPosition + 1;
3838 if (middleLength > -1) {
3839 middle = new char[middleLength];
3840 System.arraycopy(source, startPosition, middle, 0, middleLength);
3842 middle = new char[0];
3844 char end[] = new char[source.length - (currentPosition - 1)];
3845 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3846 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3847 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3851 public final String toStringAction(int act) {
3853 case TokenNameERROR:
3854 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3856 case TokenNameINLINE_HTML:
3857 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3858 case TokenNameIdentifier:
3859 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3860 case TokenNameVariable:
3861 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3862 case TokenNameabstract:
3863 return "abstract"; //$NON-NLS-1$
3865 return "AND"; //$NON-NLS-1$
3866 case TokenNamearray:
3867 return "array"; //$NON-NLS-1$
3869 return "as"; //$NON-NLS-1$
3870 case TokenNamebreak:
3871 return "break"; //$NON-NLS-1$
3873 return "case"; //$NON-NLS-1$
3874 case TokenNameclass:
3875 return "class"; //$NON-NLS-1$
3876 case TokenNamecatch:
3877 return "catch"; //$NON-NLS-1$
3878 case TokenNameclone:
3881 case TokenNameconst:
3884 case TokenNamecontinue:
3885 return "continue"; //$NON-NLS-1$
3886 case TokenNamedefault:
3887 return "default"; //$NON-NLS-1$
3888 // case TokenNamedefine :
3889 // return "define"; //$NON-NLS-1$
3891 return "do"; //$NON-NLS-1$
3893 return "echo"; //$NON-NLS-1$
3895 return "else"; //$NON-NLS-1$
3896 case TokenNameelseif:
3897 return "elseif"; //$NON-NLS-1$
3898 case TokenNameendfor:
3899 return "endfor"; //$NON-NLS-1$
3900 case TokenNameendforeach:
3901 return "endforeach"; //$NON-NLS-1$
3902 case TokenNameendif:
3903 return "endif"; //$NON-NLS-1$
3904 case TokenNameendswitch:
3905 return "endswitch"; //$NON-NLS-1$
3906 case TokenNameendwhile:
3907 return "endwhile"; //$NON-NLS-1$
3910 case TokenNameextends:
3911 return "extends"; //$NON-NLS-1$
3912 // case TokenNamefalse :
3913 // return "false"; //$NON-NLS-1$
3914 case TokenNamefinal:
3915 return "final"; //$NON-NLS-1$
3917 return "for"; //$NON-NLS-1$
3918 case TokenNameforeach:
3919 return "foreach"; //$NON-NLS-1$
3920 case TokenNamefunction:
3921 return "function"; //$NON-NLS-1$
3922 case TokenNameglobal:
3923 return "global"; //$NON-NLS-1$
3925 return "if"; //$NON-NLS-1$
3926 case TokenNameimplements:
3927 return "implements"; //$NON-NLS-1$
3928 case TokenNameinclude:
3929 return "include"; //$NON-NLS-1$
3930 case TokenNameinclude_once:
3931 return "include_once"; //$NON-NLS-1$
3932 case TokenNameinstanceof:
3933 return "instanceof"; //$NON-NLS-1$
3934 case TokenNameinterface:
3935 return "interface"; //$NON-NLS-1$
3936 case TokenNameisset:
3937 return "isset"; //$NON-NLS-1$
3939 return "list"; //$NON-NLS-1$
3941 return "new"; //$NON-NLS-1$
3942 // case TokenNamenull :
3943 // return "null"; //$NON-NLS-1$
3945 return "OR"; //$NON-NLS-1$
3946 case TokenNameprint:
3947 return "print"; //$NON-NLS-1$
3948 case TokenNameprivate:
3949 return "private"; //$NON-NLS-1$
3950 case TokenNameprotected:
3951 return "protected"; //$NON-NLS-1$
3952 case TokenNamepublic:
3953 return "public"; //$NON-NLS-1$
3954 case TokenNamerequire:
3955 return "require"; //$NON-NLS-1$
3956 case TokenNamerequire_once:
3957 return "require_once"; //$NON-NLS-1$
3958 case TokenNamereturn:
3959 return "return"; //$NON-NLS-1$
3960 case TokenNamestatic:
3961 return "static"; //$NON-NLS-1$
3962 case TokenNameswitch:
3963 return "switch"; //$NON-NLS-1$
3964 // case TokenNametrue :
3965 // return "true"; //$NON-NLS-1$
3966 case TokenNameunset:
3967 return "unset"; //$NON-NLS-1$
3969 return "var"; //$NON-NLS-1$
3970 case TokenNamewhile:
3971 return "while"; //$NON-NLS-1$
3973 return "XOR"; //$NON-NLS-1$
3974 // case TokenNamethis :
3975 // return "$this"; //$NON-NLS-1$
3976 case TokenNameIntegerLiteral:
3977 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3978 case TokenNameDoubleLiteral:
3979 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3980 case TokenNameStringDoubleQuote:
3981 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3982 case TokenNameStringSingleQuote:
3983 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3984 case TokenNameStringInterpolated:
3985 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3986 case TokenNameEncapsedString0:
3987 return "`"; //$NON-NLS-1$
3988 // case TokenNameEncapsedString1:
3989 // return "\'"; //$NON-NLS-1$
3990 // case TokenNameEncapsedString2:
3991 // return "\""; //$NON-NLS-1$
3992 case TokenNameSTRING:
3993 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3994 case TokenNameHEREDOC:
3995 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3996 case TokenNamePLUS_PLUS:
3997 return "++"; //$NON-NLS-1$
3998 case TokenNameMINUS_MINUS:
3999 return "--"; //$NON-NLS-1$
4000 case TokenNameEQUAL_EQUAL:
4001 return "=="; //$NON-NLS-1$
4002 case TokenNameEQUAL_EQUAL_EQUAL:
4003 return "==="; //$NON-NLS-1$
4004 case TokenNameEQUAL_GREATER:
4005 return "=>"; //$NON-NLS-1$
4006 case TokenNameLESS_EQUAL:
4007 return "<="; //$NON-NLS-1$
4008 case TokenNameGREATER_EQUAL:
4009 return ">="; //$NON-NLS-1$
4010 case TokenNameNOT_EQUAL:
4011 return "!="; //$NON-NLS-1$
4012 case TokenNameNOT_EQUAL_EQUAL:
4013 return "!=="; //$NON-NLS-1$
4014 case TokenNameLEFT_SHIFT:
4015 return "<<"; //$NON-NLS-1$
4016 case TokenNameRIGHT_SHIFT:
4017 return ">>"; //$NON-NLS-1$
4018 case TokenNamePLUS_EQUAL:
4019 return "+="; //$NON-NLS-1$
4020 case TokenNameMINUS_EQUAL:
4021 return "-="; //$NON-NLS-1$
4022 case TokenNameMULTIPLY_EQUAL:
4023 return "*="; //$NON-NLS-1$
4024 case TokenNameDIVIDE_EQUAL:
4025 return "/="; //$NON-NLS-1$
4026 case TokenNameAND_EQUAL:
4027 return "&="; //$NON-NLS-1$
4028 case TokenNameOR_EQUAL:
4029 return "|="; //$NON-NLS-1$
4030 case TokenNameXOR_EQUAL:
4031 return "^="; //$NON-NLS-1$
4032 case TokenNameREMAINDER_EQUAL:
4033 return "%="; //$NON-NLS-1$
4034 case TokenNameDOT_EQUAL:
4035 return ".="; //$NON-NLS-1$
4036 case TokenNameLEFT_SHIFT_EQUAL:
4037 return "<<="; //$NON-NLS-1$
4038 case TokenNameRIGHT_SHIFT_EQUAL:
4039 return ">>="; //$NON-NLS-1$
4040 case TokenNameOR_OR:
4041 return "||"; //$NON-NLS-1$
4042 case TokenNameAND_AND:
4043 return "&&"; //$NON-NLS-1$
4045 return "+"; //$NON-NLS-1$
4046 case TokenNameMINUS:
4047 return "-"; //$NON-NLS-1$
4048 case TokenNameMINUS_GREATER:
4051 return "!"; //$NON-NLS-1$
4052 case TokenNameREMAINDER:
4053 return "%"; //$NON-NLS-1$
4055 return "^"; //$NON-NLS-1$
4057 return "&"; //$NON-NLS-1$
4058 case TokenNameMULTIPLY:
4059 return "*"; //$NON-NLS-1$
4061 return "|"; //$NON-NLS-1$
4062 case TokenNameTWIDDLE:
4063 return "~"; //$NON-NLS-1$
4064 case TokenNameTWIDDLE_EQUAL:
4065 return "~="; //$NON-NLS-1$
4066 case TokenNameDIVIDE:
4067 return "/"; //$NON-NLS-1$
4068 case TokenNameGREATER:
4069 return ">"; //$NON-NLS-1$
4071 return "<"; //$NON-NLS-1$
4072 case TokenNameLPAREN:
4073 return "("; //$NON-NLS-1$
4074 case TokenNameRPAREN:
4075 return ")"; //$NON-NLS-1$
4076 case TokenNameLBRACE:
4077 return "{"; //$NON-NLS-1$
4078 case TokenNameRBRACE:
4079 return "}"; //$NON-NLS-1$
4080 case TokenNameLBRACKET:
4081 return "["; //$NON-NLS-1$
4082 case TokenNameRBRACKET:
4083 return "]"; //$NON-NLS-1$
4084 case TokenNameSEMICOLON:
4085 return ";"; //$NON-NLS-1$
4086 case TokenNameQUESTION:
4087 return "?"; //$NON-NLS-1$
4088 case TokenNameCOLON:
4089 return ":"; //$NON-NLS-1$
4090 case TokenNameCOMMA:
4091 return ","; //$NON-NLS-1$
4093 return "."; //$NON-NLS-1$
4094 case TokenNameEQUAL:
4095 return "="; //$NON-NLS-1$
4098 case TokenNameDOLLAR:
4100 case TokenNameDOLLAR_LBRACE:
4102 case TokenNameLBRACE_DOLLAR:
4105 return "EOF"; //$NON-NLS-1$
4106 case TokenNameWHITESPACE:
4107 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4108 case TokenNameCOMMENT_LINE:
4109 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4110 case TokenNameCOMMENT_BLOCK:
4111 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4112 case TokenNameCOMMENT_PHPDOC:
4113 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4114 // case TokenNameHTML :
4115 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4118 return "__FILE__"; //$NON-NLS-1$
4120 return "__LINE__"; //$NON-NLS-1$
4121 case TokenNameCLASS_C:
4122 return "__CLASS__"; //$NON-NLS-1$
4123 case TokenNameMETHOD_C:
4124 return "__METHOD__"; //$NON-NLS-1$
4125 case TokenNameFUNC_C:
4126 return "__FUNCTION__"; //$NON-NLS-1
4127 case TokenNameboolCAST:
4128 return "( bool )"; //$NON-NLS-1$
4129 case TokenNameintCAST:
4130 return "( int )"; //$NON-NLS-1$
4131 case TokenNamedoubleCAST:
4132 return "( double )"; //$NON-NLS-1$
4133 case TokenNameobjectCAST:
4134 return "( object )"; //$NON-NLS-1$
4135 case TokenNamestringCAST:
4136 return "( string )"; //$NON-NLS-1$
4138 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4146 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4147 this(tokenizeComments, tokenizeWhiteSpace, false);
4150 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4151 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4154 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4155 boolean assertMode) {
4156 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4159 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4160 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4161 this.eofPosition = Integer.MAX_VALUE;
4162 this.tokenizeComments = tokenizeComments;
4163 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4164 this.tokenizeStrings = tokenizeStrings;
4165 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4166 // this.assertMode = assertMode;
4167 // this.encapsedStringStack = null;
4168 this.taskTags = taskTags;
4169 this.taskPriorities = taskPriorities;
4172 private void checkNonExternalizeString() throws InvalidInputException {
4173 if (currentLine == null)
4175 parseTags(currentLine);
4178 private void parseTags(NLSLine line) throws InvalidInputException {
4179 String s = new String(getCurrentTokenSource());
4180 int pos = s.indexOf(TAG_PREFIX);
4181 int lineLength = line.size();
4183 int start = pos + TAG_PREFIX_LENGTH;
4184 int end = s.indexOf(TAG_POSTFIX, start);
4185 String index = s.substring(start, end);
4188 i = Integer.parseInt(index) - 1;
4189 // Tags are one based not zero based.
4190 } catch (NumberFormatException e) {
4191 i = -1; // we don't want to consider this as a valid NLS tag
4193 if (line.exists(i)) {
4196 pos = s.indexOf(TAG_PREFIX, start);
4198 this.nonNLSStrings = new StringLiteral[lineLength];
4199 int nonNLSCounter = 0;
4200 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4201 StringLiteral literal = (StringLiteral) iterator.next();
4202 if (literal != null) {
4203 this.nonNLSStrings[nonNLSCounter++] = literal;
4206 if (nonNLSCounter == 0) {
4207 this.nonNLSStrings = null;
4211 this.wasNonExternalizedStringLiteral = true;
4212 if (nonNLSCounter != lineLength) {
4213 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4218 public final void scanEscapeCharacter() throws InvalidInputException {
4219 // the string with "\\u" is a legal string of two chars \ and u
4220 // thus we use a direct access to the source (for regular cases).
4221 if (unicodeAsBackSlash) {
4222 // consume next character
4223 unicodeAsBackSlash = false;
4224 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4225 // (source[currentPosition] == 'u')) {
4226 // getNextUnicodeChar();
4228 if (withoutUnicodePtr != 0) {
4229 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4233 currentCharacter = source[currentPosition++];
4234 switch (currentCharacter) {
4236 currentCharacter = '\b';
4239 currentCharacter = '\t';
4242 currentCharacter = '\n';
4245 currentCharacter = '\f';
4248 currentCharacter = '\r';
4251 currentCharacter = '\"';
4254 currentCharacter = '\'';
4257 currentCharacter = '\\';
4260 // -----------octal escape--------------
4262 // OctalDigit OctalDigit
4263 // ZeroToThree OctalDigit OctalDigit
4264 int number = Character.getNumericValue(currentCharacter);
4265 if (number >= 0 && number <= 7) {
4266 boolean zeroToThreeNot = number > 3;
4267 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4268 int digit = Character.getNumericValue(currentCharacter);
4269 if (digit >= 0 && digit <= 7) {
4270 number = (number * 8) + digit;
4271 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4272 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4273 // Digit --> ignore last character
4276 digit = Character.getNumericValue(currentCharacter);
4277 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4278 // OctalDigit OctalDigit
4279 number = (number * 8) + digit;
4280 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4281 // --> ignore last character
4285 } else { // has read \OctalDigit NonDigit--> ignore last
4289 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4293 } else { // has read \OctalDigit --> ignore last character
4297 throw new InvalidInputException(INVALID_ESCAPE);
4298 currentCharacter = (char) number;
4300 throw new InvalidInputException(INVALID_ESCAPE);
4304 // chech presence of task: tags
4305 // TODO (frederic) see if we need to take unicode characters into account...
4306 public void checkTaskTag(int commentStart, int commentEnd) {
4307 char[] src = this.source;
4309 // only look for newer task: tags
4310 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4313 int foundTaskIndex = this.foundTaskCount;
4314 char previous = src[commentStart + 1]; // should be '*' or '/'
4315 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4317 char[] priority = null;
4318 // check for tag occurrence only if not ambiguous with javadoc tag
4319 if (previous != '@') {
4320 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4321 tag = this.taskTags[itag];
4322 int tagLength = tag.length;
4326 // ensure tag is not leaded with letter if tag starts with a letter
4327 if (Scanner.isPHPIdentifierStart(tag[0])) {
4328 if (Scanner.isPHPIdentifierPart(previous)) {
4333 for (int t = 0; t < tagLength; t++) {
4336 if (x >= this.eofPosition || x >= commentEnd)
4338 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4339 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4346 // ensure tag is not followed with letter if tag finishes with a
4348 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4349 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4352 if (this.foundTaskTags == null) {
4353 this.foundTaskTags = new char[5][];
4354 this.foundTaskMessages = new char[5][];
4355 this.foundTaskPriorities = new char[5][];
4356 this.foundTaskPositions = new int[5][];
4357 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4358 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4359 this.foundTaskCount);
4360 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4361 this.foundTaskCount);
4362 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4363 this.foundTaskCount);
4364 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4365 this.foundTaskCount);
4368 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4370 this.foundTaskTags[this.foundTaskCount] = tag;
4371 this.foundTaskPriorities[this.foundTaskCount] = priority;
4372 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4373 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4374 this.foundTaskCount++;
4375 i += tagLength - 1; // will be incremented when looping
4381 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4382 // retrieve message start and end positions
4383 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4384 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4385 // at most beginning of next task
4386 if (max_value < msgStart) {
4387 max_value = msgStart; // would only occur if tag is before EOF.
4391 for (int j = msgStart; j < max_value; j++) {
4392 if ((c = src[j]) == '\n' || c == '\r') {
4398 for (int j = max_value; j > msgStart; j--) {
4399 if ((c = src[j]) == '*') {
4407 if (msgStart == end)
4410 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4412 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4414 // update the end position of the task
4415 this.foundTaskPositions[i][1] = end;
4416 // get the message source
4417 final int messageLength = end - msgStart + 1;
4418 char[] message = new char[messageLength];
4419 System.arraycopy(src, msgStart, message, 0, messageLength);
4420 this.foundTaskMessages[i] = message;
4424 // chech presence of task: tags
4425 // public void checkTaskTag(int commentStart, int commentEnd) {
4426 // // only look for newer task: tags
4427 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4428 // - 1][0] >= commentStart) {
4431 // int foundTaskIndex = this.foundTaskCount;
4432 // nextChar: for (int i = commentStart; i < commentEnd && i <
4433 // this.eofPosition; i++) {
4434 // char[] tag = null;
4435 // char[] priority = null;
4436 // // check for tag occurrence
4437 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4438 // tag = this.taskTags[itag];
4439 // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4440 // ? this.taskPriorities[itag] : null;
4441 // int tagLength = tag.length;
4442 // for (int t = 0; t < tagLength; t++) {
4443 // if (this.source[i + t] != tag[t])
4444 // continue nextTag;
4446 // if (this.foundTaskTags == null) {
4447 // this.foundTaskTags = new char[5][];
4448 // this.foundTaskMessages = new char[5][];
4449 // this.foundTaskPriorities = new char[5][];
4450 // this.foundTaskPositions = new int[5][];
4451 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4452 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4453 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4454 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4455 // char[this.foundTaskCount * 2][], 0,
4456 // this.foundTaskCount);
4457 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4458 // new char[this.foundTaskCount * 2][], 0,
4459 // this.foundTaskCount);
4460 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4461 // int[this.foundTaskCount * 2][], 0,
4462 // this.foundTaskCount);
4464 // this.foundTaskTags[this.foundTaskCount] = tag;
4465 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4466 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4468 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4469 // this.foundTaskCount++;
4470 // i += tagLength - 1; // will be incremented when looping
4473 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4474 // // retrieve message start and end positions
4475 // int msgStart = this.foundTaskPositions[i][0] +
4476 // this.foundTaskTags[i].length;
4477 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4478 // 1][0] - 1 : commentEnd - 1;
4479 // // at most beginning of next task
4480 // if (max_value < msgStart)
4481 // max_value = msgStart; // would only occur if tag is before EOF.
4484 // for (int j = msgStart; j < max_value; j++) {
4485 // if ((c = this.source[j]) == '\n' || c == '\r') {
4491 // for (int j = max_value; j > msgStart; j--) {
4492 // if ((c = this.source[j]) == '*') {
4500 // if (msgStart == end)
4501 // continue; // empty
4502 // // trim the message
4503 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4505 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4507 // // update the end position of the task
4508 // this.foundTaskPositions[i][1] = end;
4509 // // get the message source
4510 // final int messageLength = end - msgStart + 1;
4511 // char[] message = new char[messageLength];
4512 // System.arraycopy(source, msgStart, message, 0, messageLength);
4513 // this.foundTaskMessages[i] = message;