1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
19 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this
24 * value is not memorized by the scanner) - getCurrentTokenSource() which
25 * provides with the token "REAL" source (aka all unicode have been
26 * transformed into a correct char) - sourceStart gives the position into the
27 * stream - currentPosition-1 gives the sourceEnd position into the stream
30 // private boolean assertMode;
32 public boolean useAssertAsAnIndentifier = false;
34 // flag indicating if processed source contains occurrences of keyword assert
35 public boolean containsAssertKeyword = false;
37 public boolean recordLineSeparator;
39 public boolean ignorePHPOneLiner = false;
41 public boolean phpMode = false;
43 public boolean phpExpressionTag = false;
45 // public Stack encapsedStringStack = null;
47 public char currentCharacter;
49 public int startPosition;
51 public int currentPosition;
53 public int initialPosition, eofPosition;
55 // after this position eof are generated instead of real token from the
57 public boolean tokenizeComments;
59 public boolean tokenizeWhiteSpace;
61 public boolean tokenizeStrings;
63 // source should be viewed as a window (aka a part)
64 // of a entire very large stream
68 public char[] withoutUnicodeBuffer;
70 public int withoutUnicodePtr;
72 // when == 0 ==> no unicode in the current token
73 public boolean unicodeAsBackSlash = false;
75 public boolean scanningFloatLiteral = false;
77 // support for /** comments
78 public int[] commentStops = new int[10];
80 public int[] commentStarts = new int[10];
82 public int commentPtr = -1; // no comment test with commentPtr value -1
84 protected int lastCommentLinePosition = -1;
86 // diet parsing support - jump over some method body when requested
87 public boolean diet = false;
89 // support for the poor-line-debuggers ....
90 // remember the position of the cr/lf
91 public int[] lineEnds = new int[250];
93 public int linePtr = -1;
95 public boolean wasAcr = false;
97 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
99 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
101 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
103 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
105 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
107 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
109 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
111 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
113 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
115 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
117 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
119 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
121 // ----------------optimized identifier managment------------------
122 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
123 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
124 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
125 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
126 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
127 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
128 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
129 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
130 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
132 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133 '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
134 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
142 public final static int MAX_OBVIOUS = 256;
144 static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
146 public final static int C_DOLLAR = 8;
148 public final static int C_LETTER = 4;
150 public final static int C_DIGIT = 3;
152 public final static int C_SEPARATOR = 2;
154 public final static int C_SPACE = 1;
156 for (int i = '0'; i <= '9'; i++)
157 ObviousIdentCharNatures[i] = C_DIGIT;
159 for (int i = 'a'; i <= 'z'; i++)
160 ObviousIdentCharNatures[i] = C_LETTER;
161 for (int i = 'A'; i <= 'Z'; i++)
162 ObviousIdentCharNatures[i] = C_LETTER;
163 ObviousIdentCharNatures['_'] = C_LETTER;
164 for (int i = 127; i <= 255; i++)
165 ObviousIdentCharNatures[i] = C_LETTER;
167 ObviousIdentCharNatures['$'] = C_DOLLAR;
169 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
170 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
171 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
172 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
173 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
175 ObviousIdentCharNatures['.'] = C_SEPARATOR;
176 ObviousIdentCharNatures[':'] = C_SEPARATOR;
177 ObviousIdentCharNatures[';'] = C_SEPARATOR;
178 ObviousIdentCharNatures[','] = C_SEPARATOR;
179 ObviousIdentCharNatures['['] = C_SEPARATOR;
180 ObviousIdentCharNatures[']'] = C_SEPARATOR;
181 ObviousIdentCharNatures['('] = C_SEPARATOR;
182 ObviousIdentCharNatures[')'] = C_SEPARATOR;
183 ObviousIdentCharNatures['{'] = C_SEPARATOR;
184 ObviousIdentCharNatures['}'] = C_SEPARATOR;
185 ObviousIdentCharNatures['+'] = C_SEPARATOR;
186 ObviousIdentCharNatures['-'] = C_SEPARATOR;
187 ObviousIdentCharNatures['*'] = C_SEPARATOR;
188 ObviousIdentCharNatures['/'] = C_SEPARATOR;
189 ObviousIdentCharNatures['='] = C_SEPARATOR;
190 ObviousIdentCharNatures['&'] = C_SEPARATOR;
191 ObviousIdentCharNatures['|'] = C_SEPARATOR;
192 ObviousIdentCharNatures['?'] = C_SEPARATOR;
193 ObviousIdentCharNatures['<'] = C_SEPARATOR;
194 ObviousIdentCharNatures['>'] = C_SEPARATOR;
195 ObviousIdentCharNatures['!'] = C_SEPARATOR;
196 ObviousIdentCharNatures['%'] = C_SEPARATOR;
197 ObviousIdentCharNatures['^'] = C_SEPARATOR;
198 ObviousIdentCharNatures['~'] = C_SEPARATOR;
199 ObviousIdentCharNatures['"'] = C_SEPARATOR;
200 ObviousIdentCharNatures['\''] = C_SEPARATOR;
202 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
204 static final int TableSize = 30, InternalTableSize = 6;
206 // 30*6 = 180 entries
207 public static final int OptimizedLength = 6;
210 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
212 // support for detecting non-externalized string literals
213 int currentLineNr = -1;
215 int previousLineNr = -1;
217 NLSLine currentLine = null;
219 List lines = new ArrayList();
221 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
223 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
225 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
227 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
229 public StringLiteral[] nonNLSStrings = null;
231 public boolean checkNonExternalizedStringLiterals = true;
233 public boolean wasNonExternalizedStringLiteral = false;
236 for (int i = 0; i < 6; i++) {
237 for (int j = 0; j < TableSize; j++) {
238 for (int k = 0; k < InternalTableSize; k++) {
239 charArray_length[i][j][k] = initCharArray;
245 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
247 public static final int RoundBracket = 0;
249 public static final int SquareBracket = 1;
251 public static final int CurlyBracket = 2;
253 public static final int BracketKinds = 3;
256 public char[][] foundTaskTags = null;
258 public char[][] foundTaskMessages;
260 public char[][] foundTaskPriorities = null;
262 public int[][] foundTaskPositions;
264 public int foundTaskCount = 0;
266 public char[][] taskTags = null;
268 public char[][] taskPriorities = null;
270 public boolean isTaskCaseSensitive = true;
272 public static final boolean DEBUG = false;
274 public static final boolean TRACE = false;
276 public ICompilationUnit compilationUnit = null;
279 * Determines if the specified character is permissible as the first character
280 * in a PHP identifier or variable
282 * The '$' character for PHP variables is regarded as a correct first
286 public static boolean isPHPIdentOrVarStart(char ch) {
287 if (ch < MAX_OBVIOUS) {
288 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
291 //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
295 * Determines if the specified character is permissible as the first character
296 * in a PHP identifier.
298 * The '$' character for PHP variables isn't regarded as the first character !
300 public static boolean isPHPIdentifierStart(char ch) {
301 if (ch < MAX_OBVIOUS) {
302 return ObviousIdentCharNatures[ch]==C_LETTER;
305 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
309 * Determines if the specified character may be part of a PHP identifier as
310 * other than the first character
312 public static boolean isPHPIdentifierPart(char ch) {
313 if (ch < MAX_OBVIOUS) {
314 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
317 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
320 public static boolean isSQLIdentifierPart(char ch) {
321 if (ch < MAX_OBVIOUS) {
322 return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
327 public final boolean atEnd() {
328 // This code is not relevant if source is
329 // Only a part of the real stream input
330 return source.length == currentPosition;
333 public char[] getCurrentIdentifierSource() {
334 // return the token REAL source (aka unicodes are precomputed)
336 // if (withoutUnicodePtr != 0)
337 // //0 is used as a fast test flag so the real first char is in position 1
339 // withoutUnicodeBuffer,
341 // result = new char[withoutUnicodePtr],
343 // withoutUnicodePtr);
345 int length = currentPosition - startPosition;
346 switch (length) { // see OptimizedLength
348 return optimizedCurrentTokenSource1();
350 return optimizedCurrentTokenSource2();
352 return optimizedCurrentTokenSource3();
354 return optimizedCurrentTokenSource4();
356 return optimizedCurrentTokenSource5();
358 return optimizedCurrentTokenSource6();
361 System.arraycopy(source, startPosition, result = new char[length], 0, length);
366 public int getCurrentTokenEndPosition() {
367 return this.currentPosition - 1;
370 public final char[] getCurrentTokenSource() {
371 // Return the token REAL source (aka unicodes are precomputed)
373 // if (withoutUnicodePtr != 0)
374 // // 0 is used as a fast test flag so the real first char is in position 1
376 // withoutUnicodeBuffer,
378 // result = new char[withoutUnicodePtr],
380 // withoutUnicodePtr);
383 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
388 public final char[] getCurrentTokenSource(int startPos) {
389 // Return the token REAL source (aka unicodes are precomputed)
391 // if (withoutUnicodePtr != 0)
392 // // 0 is used as a fast test flag so the real first char is in position 1
394 // withoutUnicodeBuffer,
396 // result = new char[withoutUnicodePtr],
398 // withoutUnicodePtr);
401 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
406 public final char[] getCurrentTokenSourceString() {
407 // return the token REAL source (aka unicodes are precomputed).
408 // REMOVE the two " that are at the beginning and the end.
410 if (withoutUnicodePtr != 0)
411 // 0 is used as a fast test flag so the real first char is in position 1
412 System.arraycopy(withoutUnicodeBuffer, 2,
413 // 2 is 1 (real start) + 1 (to jump over the ")
414 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
417 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
422 public final char[] getRawTokenSourceEnd() {
423 int length = this.eofPosition - this.currentPosition - 1;
424 char[] sourceEnd = new char[length];
425 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
429 public int getCurrentTokenStartPosition() {
430 return this.startPosition;
433 public final char[] getCurrentStringLiteralSource() {
434 // Return the token REAL source (aka unicodes are precomputed)
435 if (startPosition + 1 >= currentPosition) {
440 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
445 public final char[] getCurrentStringLiteralSource(int startPos) {
446 // Return the token REAL source (aka unicodes are precomputed)
449 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
455 * Search the source position corresponding to the end of a given line number
457 * Line numbers are 1-based, and relative to the scanner initialPosition.
458 * Character positions are 0-based.
460 * In case the given line number is inconsistent, answers -1.
462 public final int getLineEnd(int lineNumber) {
463 if (lineEnds == null)
465 if (lineNumber >= lineEnds.length)
469 if (lineNumber == lineEnds.length - 1)
471 return lineEnds[lineNumber - 1];
472 // next line start one character behind the lineEnd of the previous line
476 * Search the source position corresponding to the beginning of a given line
479 * Line numbers are 1-based, and relative to the scanner initialPosition.
480 * Character positions are 0-based.
482 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
484 * In case the given line number is inconsistent, answers -1.
486 public final int getLineStart(int lineNumber) {
487 if (lineEnds == null)
489 if (lineNumber >= lineEnds.length)
494 return initialPosition;
495 return lineEnds[lineNumber - 2] + 1;
496 // next line start one character behind the lineEnd of the previous line
499 public final boolean getNextChar(char testedChar) {
501 // handle the case of unicode.
502 // when a unicode appears then we must use a buffer that holds char
504 // At the end of this method currentCharacter holds the new visited char
505 // and currentPosition points right next after it
506 // Both previous lines are true if the currentCharacter is == to the
508 // On false, no side effect has occured.
509 // ALL getNextChar.... ARE OPTIMIZED COPIES
510 int temp = currentPosition;
512 currentCharacter = source[currentPosition++];
513 // if (((currentCharacter = source[currentPosition++]) == '\\')
514 // && (source[currentPosition] == 'u')) {
515 // //-------------unicode traitement ------------
516 // int c1, c2, c3, c4;
517 // int unicodeSize = 6;
518 // currentPosition++;
519 // while (source[currentPosition] == 'u') {
520 // currentPosition++;
524 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
526 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
528 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
530 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
532 // currentPosition = temp;
536 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
537 // if (currentCharacter != testedChar) {
538 // currentPosition = temp;
541 // unicodeAsBackSlash = currentCharacter == '\\';
543 // //need the unicode buffer
544 // if (withoutUnicodePtr == 0) {
545 // //buffer all the entries that have been left aside....
546 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
550 // withoutUnicodeBuffer,
552 // withoutUnicodePtr);
554 // //fill the buffer with the char
555 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
558 // } //-------------end unicode traitement--------------
560 if (currentCharacter != testedChar) {
561 currentPosition = temp;
564 unicodeAsBackSlash = false;
565 // if (withoutUnicodePtr != 0)
566 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
569 } catch (IndexOutOfBoundsException e) {
570 unicodeAsBackSlash = false;
571 currentPosition = temp;
576 public final int getNextChar(char testedChar1, char testedChar2) {
577 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
578 // test can be done with (x==0) for the first and (x>0) for the second
579 // handle the case of unicode.
580 // when a unicode appears then we must use a buffer that holds char
582 // At the end of this method currentCharacter holds the new visited char
583 // and currentPosition points right next after it
584 // Both previous lines are true if the currentCharacter is == to the
586 // On false, no side effect has occured.
587 // ALL getNextChar.... ARE OPTIMIZED COPIES
588 int temp = currentPosition;
591 currentCharacter = source[currentPosition++];
592 // if (((currentCharacter = source[currentPosition++]) == '\\')
593 // && (source[currentPosition] == 'u')) {
594 // //-------------unicode traitement ------------
595 // int c1, c2, c3, c4;
596 // int unicodeSize = 6;
597 // currentPosition++;
598 // while (source[currentPosition] == 'u') {
599 // currentPosition++;
603 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
605 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
607 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
609 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
611 // currentPosition = temp;
615 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
616 // if (currentCharacter == testedChar1)
618 // else if (currentCharacter == testedChar2)
621 // currentPosition = temp;
625 // //need the unicode buffer
626 // if (withoutUnicodePtr == 0) {
627 // //buffer all the entries that have been left aside....
628 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
632 // withoutUnicodeBuffer,
634 // withoutUnicodePtr);
636 // //fill the buffer with the char
637 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
639 // } //-------------end unicode traitement--------------
641 if (currentCharacter == testedChar1)
643 else if (currentCharacter == testedChar2)
646 currentPosition = temp;
649 // if (withoutUnicodePtr != 0)
650 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
653 } catch (IndexOutOfBoundsException e) {
654 currentPosition = temp;
659 public final boolean getNextCharAsDigit() {
661 // handle the case of unicode.
662 // when a unicode appears then we must use a buffer that holds char
664 // At the end of this method currentCharacter holds the new visited char
665 // and currentPosition points right next after it
666 // Both previous lines are true if the currentCharacter is a digit
667 // On false, no side effect has occured.
668 // ALL getNextChar.... ARE OPTIMIZED COPIES
669 int temp = currentPosition;
671 currentCharacter = source[currentPosition++];
672 // if (((currentCharacter = source[currentPosition++]) == '\\')
673 // && (source[currentPosition] == 'u')) {
674 // //-------------unicode traitement ------------
675 // int c1, c2, c3, c4;
676 // int unicodeSize = 6;
677 // currentPosition++;
678 // while (source[currentPosition] == 'u') {
679 // currentPosition++;
683 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
685 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
687 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
689 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
691 // currentPosition = temp;
695 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
696 // if (!Character.isDigit(currentCharacter)) {
697 // currentPosition = temp;
701 // //need the unicode buffer
702 // if (withoutUnicodePtr == 0) {
703 // //buffer all the entries that have been left aside....
704 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
708 // withoutUnicodeBuffer,
710 // withoutUnicodePtr);
712 // //fill the buffer with the char
713 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
715 // } //-------------end unicode traitement--------------
717 if (!Character.isDigit(currentCharacter)) {
718 currentPosition = temp;
721 // if (withoutUnicodePtr != 0)
722 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
725 } catch (IndexOutOfBoundsException e) {
726 currentPosition = temp;
731 public final boolean getNextCharAsDigit(int radix) {
733 // handle the case of unicode.
734 // when a unicode appears then we must use a buffer that holds char
736 // At the end of this method currentCharacter holds the new visited char
737 // and currentPosition points right next after it
738 // Both previous lines are true if the currentCharacter is a digit base on
740 // On false, no side effect has occured.
741 // ALL getNextChar.... ARE OPTIMIZED COPIES
742 int temp = currentPosition;
744 currentCharacter = source[currentPosition++];
745 // if (((currentCharacter = source[currentPosition++]) == '\\')
746 // && (source[currentPosition] == 'u')) {
747 // //-------------unicode traitement ------------
748 // int c1, c2, c3, c4;
749 // int unicodeSize = 6;
750 // currentPosition++;
751 // while (source[currentPosition] == 'u') {
752 // currentPosition++;
756 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
758 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
760 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
762 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
764 // currentPosition = temp;
768 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
769 // if (Character.digit(currentCharacter, radix) == -1) {
770 // currentPosition = temp;
774 // //need the unicode buffer
775 // if (withoutUnicodePtr == 0) {
776 // //buffer all the entries that have been left aside....
777 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
781 // withoutUnicodeBuffer,
783 // withoutUnicodePtr);
785 // //fill the buffer with the char
786 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
788 // } //-------------end unicode traitement--------------
790 if (Character.digit(currentCharacter, radix) == -1) {
791 currentPosition = temp;
794 // if (withoutUnicodePtr != 0)
795 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
798 } catch (IndexOutOfBoundsException e) {
799 currentPosition = temp;
804 public boolean getNextCharAsJavaIdentifierPart() {
806 // handle the case of unicode.
807 // when a unicode appears then we must use a buffer that holds char
809 // At the end of this method currentCharacter holds the new visited char
810 // and currentPosition points right next after it
811 // Both previous lines are true if the currentCharacter is a
812 // JavaIdentifierPart
813 // On false, no side effect has occured.
814 // ALL getNextChar.... ARE OPTIMIZED COPIES
815 int temp = currentPosition;
817 currentCharacter = source[currentPosition++];
818 // if (((currentCharacter = source[currentPosition++]) == '\\')
819 // && (source[currentPosition] == 'u')) {
820 // //-------------unicode traitement ------------
821 // int c1, c2, c3, c4;
822 // int unicodeSize = 6;
823 // currentPosition++;
824 // while (source[currentPosition] == 'u') {
825 // currentPosition++;
829 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
831 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
833 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
835 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
837 // currentPosition = temp;
841 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
842 // if (!isPHPIdentifierPart(currentCharacter)) {
843 // currentPosition = temp;
847 // //need the unicode buffer
848 // if (withoutUnicodePtr == 0) {
849 // //buffer all the entries that have been left aside....
850 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
854 // withoutUnicodeBuffer,
856 // withoutUnicodePtr);
858 // //fill the buffer with the char
859 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
861 // } //-------------end unicode traitement--------------
863 if (!isPHPIdentifierPart(currentCharacter)) {
864 currentPosition = temp;
867 // if (withoutUnicodePtr != 0)
868 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
871 } catch (IndexOutOfBoundsException e) {
872 currentPosition = temp;
877 public int getCastOrParen() {
878 int tempPosition = currentPosition;
879 char tempCharacter = currentCharacter;
880 int tempToken = TokenNameLPAREN;
881 boolean found = false;
882 StringBuffer buf = new StringBuffer();
885 currentCharacter = source[currentPosition++];
886 } while (currentCharacter == ' ' || currentCharacter == '\t');
887 while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
888 // while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
889 buf.append(currentCharacter);
890 currentCharacter = source[currentPosition++];
892 if (buf.length() >= 3 && buf.length() <= 7) {
893 char[] data = buf.toString().toCharArray();
895 switch (data.length) {
898 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
900 tempToken = TokenNameintCAST;
905 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
907 tempToken = TokenNameboolCAST;
910 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
912 tempToken = TokenNamedoubleCAST;
918 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
919 && (data[++index] == 'y')) {
921 tempToken = TokenNamearrayCAST;
924 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
925 && (data[++index] == 't')) {
927 tempToken = TokenNameunsetCAST;
930 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
931 && (data[++index] == 't')) {
933 tempToken = TokenNamedoubleCAST;
939 // object string double
940 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
941 && (data[++index] == 'c') && (data[++index] == 't')) {
943 tempToken = TokenNameobjectCAST;
946 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
947 && (data[++index] == 'n') && (data[++index] == 'g')) {
949 tempToken = TokenNamestringCAST;
952 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
953 && (data[++index] == 'l') && (data[++index] == 'e')) {
955 tempToken = TokenNamedoubleCAST;
962 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
963 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
965 tempToken = TokenNameboolCAST;
968 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
969 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
971 tempToken = TokenNameintCAST;
977 while (currentCharacter == ' ' || currentCharacter == '\t') {
978 currentCharacter = source[currentPosition++];
980 if (currentCharacter == ')') {
985 } catch (IndexOutOfBoundsException e) {
987 currentCharacter = tempCharacter;
988 currentPosition = tempPosition;
989 return TokenNameLPAREN;
992 public void consumeStringInterpolated() throws InvalidInputException {
994 // consume next character
995 unicodeAsBackSlash = false;
996 currentCharacter = source[currentPosition++];
997 // if (((currentCharacter = source[currentPosition++]) == '\\')
998 // && (source[currentPosition] == 'u')) {
999 // getNextUnicodeChar();
1001 // if (withoutUnicodePtr != 0) {
1002 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1003 // currentCharacter;
1006 while (currentCharacter != '`') {
1007 /** ** in PHP \r and \n are valid in string literals *** */
1008 // if ((currentCharacter == '\n')
1009 // || (currentCharacter == '\r')) {
1010 // // relocate if finding another quote fairly close: thus unicode
1011 // '/u000D' will be fully consumed
1012 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1013 // if (currentPosition + lookAhead == source.length)
1015 // if (source[currentPosition + lookAhead] == '\n')
1017 // if (source[currentPosition + lookAhead] == '\"') {
1018 // currentPosition += lookAhead + 1;
1022 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1024 if (currentCharacter == '\\') {
1025 int escapeSize = currentPosition;
1026 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1027 // scanEscapeCharacter make a side effect on this value and we need
1028 // the previous value few lines down this one
1029 scanDoubleQuotedEscapeCharacter();
1030 escapeSize = currentPosition - escapeSize;
1031 if (withoutUnicodePtr == 0) {
1032 // buffer all the entries that have been left aside....
1033 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1034 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1035 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1036 } else { // overwrite the / in the buffer
1037 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1038 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1039 // where only one is correct
1040 withoutUnicodePtr--;
1043 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1044 if (recordLineSeparator) {
1045 pushLineSeparator();
1048 // consume next character
1049 unicodeAsBackSlash = false;
1050 currentCharacter = source[currentPosition++];
1051 // if (((currentCharacter = source[currentPosition++]) == '\\')
1052 // && (source[currentPosition] == 'u')) {
1053 // getNextUnicodeChar();
1055 if (withoutUnicodePtr != 0) {
1056 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1060 } catch (IndexOutOfBoundsException e) {
1061 // reset end position for error reporting
1062 currentPosition -= 2;
1063 throw new InvalidInputException(UNTERMINATED_STRING);
1064 } catch (InvalidInputException e) {
1065 if (e.getMessage().equals(INVALID_ESCAPE)) {
1066 // relocate if finding another quote fairly close: thus unicode
1067 // '/u000D' will be fully consumed
1068 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1069 if (currentPosition + lookAhead == source.length)
1071 if (source[currentPosition + lookAhead] == '\n')
1073 if (source[currentPosition + lookAhead] == '`') {
1074 currentPosition += lookAhead + 1;
1081 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1082 // //$NON-NLS-?$ where ? is an
1084 if (currentLine == null) {
1085 currentLine = new NLSLine();
1086 lines.add(currentLine);
1088 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1092 public void consumeStringConstant() throws InvalidInputException {
1094 // consume next character
1095 unicodeAsBackSlash = false;
1096 currentCharacter = source[currentPosition++];
1097 // if (((currentCharacter = source[currentPosition++]) == '\\')
1098 // && (source[currentPosition] == 'u')) {
1099 // getNextUnicodeChar();
1101 // if (withoutUnicodePtr != 0) {
1102 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1103 // currentCharacter;
1106 while (currentCharacter != '\'') {
1107 /** ** in PHP \r and \n are valid in string literals *** */
1108 // if ((currentCharacter == '\n')
1109 // || (currentCharacter == '\r')) {
1110 // // relocate if finding another quote fairly close: thus unicode
1111 // '/u000D' will be fully consumed
1112 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1113 // if (currentPosition + lookAhead == source.length)
1115 // if (source[currentPosition + lookAhead] == '\n')
1117 // if (source[currentPosition + lookAhead] == '\"') {
1118 // currentPosition += lookAhead + 1;
1122 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1124 if (currentCharacter == '\\') {
1125 int escapeSize = currentPosition;
1126 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1127 // scanEscapeCharacter make a side effect on this value and we need
1128 // the previous value few lines down this one
1129 scanSingleQuotedEscapeCharacter();
1130 escapeSize = currentPosition - escapeSize;
1131 if (withoutUnicodePtr == 0) {
1132 // buffer all the entries that have been left aside....
1133 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1134 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1135 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1136 } else { // overwrite the / in the buffer
1137 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1138 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1139 // where only one is correct
1140 withoutUnicodePtr--;
1143 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1144 if (recordLineSeparator) {
1145 pushLineSeparator();
1148 // consume next character
1149 unicodeAsBackSlash = false;
1150 currentCharacter = source[currentPosition++];
1151 // if (((currentCharacter = source[currentPosition++]) == '\\')
1152 // && (source[currentPosition] == 'u')) {
1153 // getNextUnicodeChar();
1155 if (withoutUnicodePtr != 0) {
1156 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1160 } catch (IndexOutOfBoundsException e) {
1161 // reset end position for error reporting
1162 currentPosition -= 2;
1163 throw new InvalidInputException(UNTERMINATED_STRING);
1164 } catch (InvalidInputException e) {
1165 if (e.getMessage().equals(INVALID_ESCAPE)) {
1166 // relocate if finding another quote fairly close: thus unicode
1167 // '/u000D' will be fully consumed
1168 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1169 if (currentPosition + lookAhead == source.length)
1171 if (source[currentPosition + lookAhead] == '\n')
1173 if (source[currentPosition + lookAhead] == '\'') {
1174 currentPosition += lookAhead + 1;
1181 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1182 // //$NON-NLS-?$ where ? is an
1184 if (currentLine == null) {
1185 currentLine = new NLSLine();
1186 lines.add(currentLine);
1188 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1192 public void consumeStringLiteral() throws InvalidInputException {
1194 boolean openDollarBrace = false;
1195 // consume next character
1196 unicodeAsBackSlash = false;
1197 currentCharacter = source[currentPosition++];
1198 while (currentCharacter != '"' || openDollarBrace) {
1199 /** ** in PHP \r and \n are valid in string literals *** */
1200 if (currentCharacter == '\\') {
1201 int escapeSize = currentPosition;
1202 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1203 // scanEscapeCharacter make a side effect on this value and we need
1204 // the previous value few lines down this one
1205 scanDoubleQuotedEscapeCharacter();
1206 escapeSize = currentPosition - escapeSize;
1207 if (withoutUnicodePtr == 0) {
1208 // buffer all the entries that have been left aside....
1209 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1210 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1211 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1212 } else { // overwrite the / in the buffer
1213 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1214 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1215 // where only one is correct
1216 withoutUnicodePtr--;
1219 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1220 openDollarBrace = true;
1221 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1222 openDollarBrace = true;
1223 } else if (currentCharacter == '}') {
1224 openDollarBrace = false;
1225 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1226 if (recordLineSeparator) {
1227 pushLineSeparator();
1230 // consume next character
1231 unicodeAsBackSlash = false;
1232 currentCharacter = source[currentPosition++];
1233 if (withoutUnicodePtr != 0) {
1234 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1237 } catch (IndexOutOfBoundsException e) {
1238 // reset end position for error reporting
1239 currentPosition -= 2;
1240 throw new InvalidInputException(UNTERMINATED_STRING);
1241 } catch (InvalidInputException e) {
1242 if (e.getMessage().equals(INVALID_ESCAPE)) {
1243 // relocate if finding another quote fairly close: thus unicode
1244 // '/u000D' will be fully consumed
1245 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1246 if (currentPosition + lookAhead == source.length)
1248 if (source[currentPosition + lookAhead] == '\n')
1250 if (source[currentPosition + lookAhead] == '\"') {
1251 currentPosition += lookAhead + 1;
1258 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1259 // //$NON-NLS-?$ where ? is an
1261 if (currentLine == null) {
1262 currentLine = new NLSLine();
1263 lines.add(currentLine);
1265 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1269 public int getNextToken() throws InvalidInputException {
1270 phpExpressionTag = false;
1272 return getInlinedHTMLToken(currentPosition);
1275 this.wasAcr = false;
1277 jumpOverMethodBody();
1279 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1283 withoutUnicodePtr = 0;
1284 // start with a new token
1285 char encapsedChar = ' ';
1286 // if (!encapsedStringStack.isEmpty()) {
1287 // encapsedChar = ((Character)
1288 // encapsedStringStack.peek()).charValue();
1290 // if (encapsedChar != '$' && encapsedChar != ' ') {
1291 // currentCharacter = source[currentPosition++];
1292 // if (currentCharacter == encapsedChar) {
1293 // switch (currentCharacter) {
1295 // return TokenNameEncapsedString0;
1297 // return TokenNameEncapsedString1;
1299 // return TokenNameEncapsedString2;
1302 // while (currentCharacter != encapsedChar) {
1303 // /** ** in PHP \r and \n are valid in string literals *** */
1304 // switch (currentCharacter) {
1306 // int escapeSize = currentPosition;
1307 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1308 // //scanEscapeCharacter make a side effect on this value and
1309 // // we need the previous value few lines down this one
1310 // scanDoubleQuotedEscapeCharacter();
1311 // escapeSize = currentPosition - escapeSize;
1312 // if (withoutUnicodePtr == 0) {
1313 // //buffer all the entries that have been left aside....
1314 // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1316 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1317 // withoutUnicodePtr);
1318 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1319 // } else { //overwrite the / in the buffer
1320 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1321 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1322 // withoutUnicodePtr--;
1328 // if (recordLineSeparator) {
1329 // pushLineSeparator();
1333 // if (isPHPIdentifierStart(source[currentPosition]) ||
1334 // source[currentPosition] == '{') {
1335 // currentPosition--;
1336 // encapsedStringStack.push(new Character('$'));
1337 // return TokenNameSTRING;
1341 // if (source[currentPosition] == '$') { // CURLY_OPEN
1342 // currentPosition--;
1343 // encapsedStringStack.push(new Character('$'));
1344 // return TokenNameSTRING;
1347 // // consume next character
1348 // unicodeAsBackSlash = false;
1349 // currentCharacter = source[currentPosition++];
1350 // if (withoutUnicodePtr != 0) {
1351 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1355 // currentPosition--;
1356 // return TokenNameSTRING;
1358 // ---------Consume white space and handles startPosition---------
1359 int whiteStart = currentPosition;
1360 startPosition = currentPosition;
1361 currentCharacter = source[currentPosition++];
1362 // if (encapsedChar == '$') {
1363 // switch (currentCharacter) {
1365 // currentCharacter = source[currentPosition++];
1366 // return TokenNameSTRING;
1368 // if (encapsedChar == '$') {
1369 // if (getNextChar('$'))
1370 // return TokenNameLBRACE_DOLLAR;
1372 // return TokenNameLBRACE;
1374 // return TokenNameRBRACE;
1376 // return TokenNameLBRACKET;
1378 // return TokenNameRBRACKET;
1380 // if (tokenizeStrings) {
1381 // consumeStringConstant();
1382 // return TokenNameStringSingleQuote;
1384 // return TokenNameEncapsedString1;
1386 // return TokenNameEncapsedString2;
1388 // if (tokenizeStrings) {
1389 // consumeStringInterpolated();
1390 // return TokenNameStringInterpolated;
1392 // return TokenNameEncapsedString0;
1394 // if (getNextChar('>'))
1395 // return TokenNameMINUS_GREATER;
1396 // return TokenNameSTRING;
1398 // if (currentCharacter == '$') {
1399 // int oldPosition = currentPosition;
1401 // currentCharacter = source[currentPosition++];
1402 // if (currentCharacter == '{') {
1403 // return TokenNameDOLLAR_LBRACE;
1405 // if (isPHPIdentifierStart(currentCharacter)) {
1406 // return scanIdentifierOrKeyword(true);
1408 // currentPosition = oldPosition;
1409 // return TokenNameSTRING;
1411 // } catch (IndexOutOfBoundsException e) {
1412 // currentPosition = oldPosition;
1413 // return TokenNameSTRING;
1416 // if (isPHPIdentifierStart(currentCharacter))
1417 // return scanIdentifierOrKeyword(false);
1418 // if (Character.isDigit(currentCharacter))
1419 // return scanNumber(false);
1420 // return TokenNameERROR;
1423 // boolean isWhiteSpace;
1425 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1426 startPosition = currentPosition;
1427 currentCharacter = source[currentPosition++];
1428 // if (((currentCharacter = source[currentPosition++]) == '\\')
1429 // && (source[currentPosition] == 'u')) {
1430 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1432 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1433 checkNonExternalizeString();
1434 if (recordLineSeparator) {
1435 pushLineSeparator();
1440 // isWhiteSpace = (currentCharacter == ' ')
1441 // || Character.isWhitespace(currentCharacter);
1444 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1445 // reposition scanner in case we are interested by spaces as tokens
1447 startPosition = whiteStart;
1448 return TokenNameWHITESPACE;
1450 // little trick to get out in the middle of a source compuation
1451 if (currentPosition > eofPosition)
1452 return TokenNameEOF;
1453 // ---------Identify the next token-------------
1454 switch (currentCharacter) {
1456 return getCastOrParen();
1458 return TokenNameRPAREN;
1460 return TokenNameLBRACE;
1462 return TokenNameRBRACE;
1464 return TokenNameLBRACKET;
1466 return TokenNameRBRACKET;
1468 return TokenNameSEMICOLON;
1470 return TokenNameCOMMA;
1472 if (getNextChar('='))
1473 return TokenNameDOT_EQUAL;
1474 if (getNextCharAsDigit())
1475 return scanNumber(true);
1476 return TokenNameDOT;
1479 if ((test = getNextChar('+', '=')) == 0)
1480 return TokenNamePLUS_PLUS;
1482 return TokenNamePLUS_EQUAL;
1483 return TokenNamePLUS;
1487 if ((test = getNextChar('-', '=')) == 0)
1488 return TokenNameMINUS_MINUS;
1490 return TokenNameMINUS_EQUAL;
1491 if (getNextChar('>'))
1492 return TokenNameMINUS_GREATER;
1493 return TokenNameMINUS;
1496 if (getNextChar('='))
1497 return TokenNameTWIDDLE_EQUAL;
1498 return TokenNameTWIDDLE;
1500 if (getNextChar('=')) {
1501 if (getNextChar('=')) {
1502 return TokenNameNOT_EQUAL_EQUAL;
1504 return TokenNameNOT_EQUAL;
1506 return TokenNameNOT;
1508 if (getNextChar('='))
1509 return TokenNameMULTIPLY_EQUAL;
1510 return TokenNameMULTIPLY;
1512 if (getNextChar('='))
1513 return TokenNameREMAINDER_EQUAL;
1514 return TokenNameREMAINDER;
1516 int oldPosition = currentPosition;
1518 currentCharacter = source[currentPosition++];
1519 } catch (IndexOutOfBoundsException e) {
1520 currentPosition = oldPosition;
1521 return TokenNameLESS;
1523 switch (currentCharacter) {
1525 return TokenNameLESS_EQUAL;
1527 return TokenNameNOT_EQUAL;
1529 if (getNextChar('='))
1530 return TokenNameLEFT_SHIFT_EQUAL;
1531 if (getNextChar('<')) {
1532 currentCharacter = source[currentPosition++];
1533 while (Character.isWhitespace(currentCharacter)) {
1534 currentCharacter = source[currentPosition++];
1536 int heredocStart = currentPosition - 1;
1537 int heredocLength = 0;
1538 if (isPHPIdentifierStart(currentCharacter)) {
1539 currentCharacter = source[currentPosition++];
1541 return TokenNameERROR;
1543 while (isPHPIdentifierPart(currentCharacter)) {
1544 currentCharacter = source[currentPosition++];
1546 heredocLength = currentPosition - heredocStart - 1;
1547 // heredoc end-tag determination
1548 boolean endTag = true;
1551 ch = source[currentPosition++];
1552 if (ch == '\r' || ch == '\n') {
1553 if (recordLineSeparator) {
1554 pushLineSeparator();
1558 for (int i = 0; i < heredocLength; i++) {
1559 if (source[currentPosition + i] != source[heredocStart + i]) {
1565 currentPosition += heredocLength - 1;
1566 currentCharacter = source[currentPosition++];
1567 break; // do...while loop
1573 return TokenNameHEREDOC;
1575 return TokenNameLEFT_SHIFT;
1577 currentPosition = oldPosition;
1578 return TokenNameLESS;
1582 if ((test = getNextChar('=', '>')) == 0)
1583 return TokenNameGREATER_EQUAL;
1585 if ((test = getNextChar('=', '>')) == 0)
1586 return TokenNameRIGHT_SHIFT_EQUAL;
1587 return TokenNameRIGHT_SHIFT;
1589 return TokenNameGREATER;
1592 if (getNextChar('=')) {
1593 if (getNextChar('=')) {
1594 return TokenNameEQUAL_EQUAL_EQUAL;
1596 return TokenNameEQUAL_EQUAL;
1598 if (getNextChar('>'))
1599 return TokenNameEQUAL_GREATER;
1600 return TokenNameEQUAL;
1603 if ((test = getNextChar('&', '=')) == 0)
1604 return TokenNameAND_AND;
1606 return TokenNameAND_EQUAL;
1607 return TokenNameAND;
1611 if ((test = getNextChar('|', '=')) == 0)
1612 return TokenNameOR_OR;
1614 return TokenNameOR_EQUAL;
1618 if (getNextChar('='))
1619 return TokenNameXOR_EQUAL;
1620 return TokenNameXOR;
1622 if (getNextChar('>')) {
1624 if (currentPosition == source.length) {
1626 return TokenNameINLINE_HTML;
1628 return getInlinedHTMLToken(currentPosition - 2);
1630 return TokenNameQUESTION;
1632 if (getNextChar(':'))
1633 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1634 return TokenNameCOLON;
1638 consumeStringConstant();
1639 return TokenNameStringSingleQuote;
1641 // if (tokenizeStrings) {
1642 consumeStringLiteral();
1643 return TokenNameStringDoubleQuote;
1645 // return TokenNameEncapsedString2;
1647 // if (tokenizeStrings) {
1648 consumeStringInterpolated();
1649 return TokenNameStringInterpolated;
1651 // return TokenNameEncapsedString0;
1654 char startChar = currentCharacter;
1655 if (getNextChar('=') && startChar == '/') {
1656 return TokenNameDIVIDE_EQUAL;
1659 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1661 this.lastCommentLinePosition = this.currentPosition;
1662 int endPositionForLineComment = 0;
1663 try { // get the next char
1664 currentCharacter = source[currentPosition++];
1665 // if (((currentCharacter = source[currentPosition++])
1667 // && (source[currentPosition] == 'u')) {
1668 // //-------------unicode traitement ------------
1669 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1670 // currentPosition++;
1671 // while (source[currentPosition] == 'u') {
1672 // currentPosition++;
1675 // Character.getNumericValue(source[currentPosition++]))
1679 // Character.getNumericValue(source[currentPosition++]))
1683 // Character.getNumericValue(source[currentPosition++]))
1687 // Character.getNumericValue(source[currentPosition++]))
1691 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1693 // currentCharacter =
1694 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1697 // handle the \\u case manually into comment
1698 // if (currentCharacter == '\\') {
1699 // if (source[currentPosition] == '\\')
1700 // currentPosition++;
1701 // } //jump over the \\
1702 boolean isUnicode = false;
1703 while (currentCharacter != '\r' && currentCharacter != '\n') {
1704 this.lastCommentLinePosition = this.currentPosition;
1705 if (currentCharacter == '?') {
1706 if (getNextChar('>')) {
1707 // ?> breaks line comments
1708 startPosition = currentPosition - 2;
1710 return TokenNameINLINE_HTML;
1713 // get the next char
1715 currentCharacter = source[currentPosition++];
1716 // if (((currentCharacter = source[currentPosition++])
1718 // && (source[currentPosition] == 'u')) {
1719 // isUnicode = true;
1720 // //-------------unicode traitement ------------
1721 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1722 // currentPosition++;
1723 // while (source[currentPosition] == 'u') {
1724 // currentPosition++;
1727 // Character.getNumericValue(source[currentPosition++]))
1731 // Character.getNumericValue(
1732 // source[currentPosition++]))
1736 // Character.getNumericValue(
1737 // source[currentPosition++]))
1741 // Character.getNumericValue(
1742 // source[currentPosition++]))
1746 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1748 // currentCharacter =
1749 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1752 // handle the \\u case manually into comment
1753 // if (currentCharacter == '\\') {
1754 // if (source[currentPosition] == '\\')
1755 // currentPosition++;
1756 // } //jump over the \\
1759 endPositionForLineComment = currentPosition - 6;
1761 endPositionForLineComment = currentPosition - 1;
1763 // recordComment(false);
1764 recordComment(TokenNameCOMMENT_LINE);
1765 if (this.taskTags != null)
1766 checkTaskTag(this.startPosition, this.currentPosition);
1767 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1768 checkNonExternalizeString();
1769 if (recordLineSeparator) {
1771 pushUnicodeLineSeparator();
1773 pushLineSeparator();
1779 if (tokenizeComments) {
1781 currentPosition = endPositionForLineComment;
1782 // reset one character behind
1784 return TokenNameCOMMENT_LINE;
1786 } catch (IndexOutOfBoundsException e) { // an eof will them
1788 if (tokenizeComments) {
1790 // reset one character behind
1791 return TokenNameCOMMENT_LINE;
1797 // traditional and annotation comment
1798 boolean isJavadoc = false, star = false;
1799 // consume next character
1800 unicodeAsBackSlash = false;
1801 currentCharacter = source[currentPosition++];
1802 // if (((currentCharacter = source[currentPosition++]) ==
1804 // && (source[currentPosition] == 'u')) {
1805 // getNextUnicodeChar();
1807 // if (withoutUnicodePtr != 0) {
1808 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1809 // currentCharacter;
1812 if (currentCharacter == '*') {
1816 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1817 checkNonExternalizeString();
1818 if (recordLineSeparator) {
1819 pushLineSeparator();
1824 try { // get the next char
1825 currentCharacter = source[currentPosition++];
1826 // if (((currentCharacter = source[currentPosition++])
1828 // && (source[currentPosition] == 'u')) {
1829 // //-------------unicode traitement ------------
1830 // getNextUnicodeChar();
1832 // handle the \\u case manually into comment
1833 // if (currentCharacter == '\\') {
1834 // if (source[currentPosition] == '\\')
1835 // currentPosition++;
1836 // //jump over the \\
1838 // empty comment is not a javadoc /**/
1839 if (currentCharacter == '/') {
1842 // loop until end of comment */
1843 while ((currentCharacter != '/') || (!star)) {
1844 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1845 checkNonExternalizeString();
1846 if (recordLineSeparator) {
1847 pushLineSeparator();
1852 star = currentCharacter == '*';
1854 currentCharacter = source[currentPosition++];
1855 // if (((currentCharacter = source[currentPosition++])
1857 // && (source[currentPosition] == 'u')) {
1858 // //-------------unicode traitement ------------
1859 // getNextUnicodeChar();
1861 // handle the \\u case manually into comment
1862 // if (currentCharacter == '\\') {
1863 // if (source[currentPosition] == '\\')
1864 // currentPosition++;
1865 // } //jump over the \\
1867 // recordComment(isJavadoc);
1869 recordComment(TokenNameCOMMENT_PHPDOC);
1871 recordComment(TokenNameCOMMENT_BLOCK);
1874 if (tokenizeComments) {
1876 return TokenNameCOMMENT_PHPDOC;
1877 return TokenNameCOMMENT_BLOCK;
1880 if (this.taskTags != null) {
1881 checkTaskTag(this.startPosition, this.currentPosition);
1883 } catch (IndexOutOfBoundsException e) {
1884 // reset end position for error reporting
1885 currentPosition -= 2;
1886 throw new InvalidInputException(UNTERMINATED_COMMENT);
1890 return TokenNameDIVIDE;
1894 return TokenNameEOF;
1895 // the atEnd may not be <currentPosition == source.length> if
1896 // source is only some part of a real (external) stream
1897 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1899 if (currentCharacter == '$') {
1900 int oldPosition = currentPosition;
1902 currentCharacter = source[currentPosition++];
1903 if (isPHPIdentifierStart(currentCharacter)) {
1904 return scanIdentifierOrKeyword(true);
1906 currentPosition = oldPosition;
1907 return TokenNameDOLLAR;
1909 } catch (IndexOutOfBoundsException e) {
1910 currentPosition = oldPosition;
1911 return TokenNameDOLLAR;
1914 if (isPHPIdentifierStart(currentCharacter))
1915 return scanIdentifierOrKeyword(false);
1916 if (Character.isDigit(currentCharacter))
1917 return scanNumber(false);
1918 return TokenNameERROR;
1921 } // -----------------end switch while try--------------------
1922 catch (IndexOutOfBoundsException e) {
1925 return TokenNameEOF;
1930 * @throws InvalidInputException
1932 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1933 if (currentPosition > source.length) {
1934 currentPosition = source.length;
1935 return TokenNameEOF;
1937 startPosition = start;
1940 currentCharacter = source[currentPosition++];
1941 if (currentCharacter == '<') {
1942 if (getNextChar('?')) {
1943 currentCharacter = source[currentPosition++];
1944 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1945 if (currentCharacter != '=') { // <?=
1948 phpExpressionTag = true;
1951 if (ignorePHPOneLiner) { // for CodeFormatter
1952 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1954 return TokenNameINLINE_HTML;
1958 return TokenNameINLINE_HTML;
1961 // boolean phpStart = (currentCharacter == 'P') ||
1962 // (currentCharacter == 'p');
1964 int test = getNextChar('H', 'h');
1966 test = getNextChar('P', 'p');
1969 if (ignorePHPOneLiner) {
1970 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1972 return TokenNameINLINE_HTML;
1976 return TokenNameINLINE_HTML;
1984 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1985 if (recordLineSeparator) {
1986 pushLineSeparator();
1991 } // -----------------while--------------------
1993 return TokenNameINLINE_HTML;
1994 } // -----------------try--------------------
1995 catch (IndexOutOfBoundsException e) {
1996 startPosition = start;
2000 return TokenNameINLINE_HTML;
2006 private int lookAheadLinePHPTag() {
2007 // check if the PHP is only in this line (for CodeFormatter)
2008 int currentPositionInLine = currentPosition;
2009 char previousCharInLine = ' ';
2010 char currentCharInLine = ' ';
2011 boolean singleQuotedStringActive = false;
2012 boolean doubleQuotedStringActive = false;
2015 // look ahead in this line
2017 previousCharInLine = currentCharInLine;
2018 currentCharInLine = source[currentPositionInLine++];
2019 switch (currentCharInLine) {
2021 if (previousCharInLine == '?') {
2022 // update the scanner's current Position in the source
2023 currentPosition = currentPositionInLine;
2024 // use as "dummy" token
2025 return TokenNameEOF;
2029 if (doubleQuotedStringActive) {
2030 // ignore escaped characters in double quoted strings
2031 previousCharInLine = currentCharInLine;
2032 currentCharInLine = source[currentPositionInLine++];
2035 if (doubleQuotedStringActive) {
2036 doubleQuotedStringActive = false;
2038 if (!singleQuotedStringActive) {
2039 doubleQuotedStringActive = true;
2044 if (singleQuotedStringActive) {
2045 if (previousCharInLine != '\\') {
2046 singleQuotedStringActive = false;
2049 if (!doubleQuotedStringActive) {
2050 singleQuotedStringActive = true;
2056 return TokenNameINLINE_HTML;
2058 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2060 return TokenNameINLINE_HTML;
2064 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2066 return TokenNameINLINE_HTML;
2070 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2072 return TokenNameINLINE_HTML;
2077 } catch (IndexOutOfBoundsException e) {
2079 currentPosition = currentPositionInLine;
2080 return TokenNameINLINE_HTML;
2084 // public final void getNextUnicodeChar()
2085 // throws IndexOutOfBoundsException, InvalidInputException {
2087 // //handle the case of unicode.
2088 // //when a unicode appears then we must use a buffer that holds char
2090 // //At the end of this method currentCharacter holds the new visited char
2091 // //and currentPosition points right next after it
2093 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2095 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2096 // currentPosition++;
2097 // while (source[currentPosition] == 'u') {
2098 // currentPosition++;
2102 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2104 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2106 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2108 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2110 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2112 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2113 // //need the unicode buffer
2114 // if (withoutUnicodePtr == 0) {
2115 // //buffer all the entries that have been left aside....
2116 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2117 // System.arraycopy(
2120 // withoutUnicodeBuffer,
2122 // withoutUnicodePtr);
2124 // //fill the buffer with the char
2125 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2127 // unicodeAsBackSlash = currentCharacter == '\\';
2130 * Tokenize a method body, assuming that curly brackets are properly balanced.
2132 public final void jumpOverMethodBody() {
2133 this.wasAcr = false;
2136 while (true) { // loop for jumping over comments
2137 // ---------Consume white space and handles startPosition---------
2138 boolean isWhiteSpace;
2140 startPosition = currentPosition;
2141 currentCharacter = source[currentPosition++];
2142 // if (((currentCharacter = source[currentPosition++]) == '\\')
2143 // && (source[currentPosition] == 'u')) {
2144 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2146 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2147 pushLineSeparator();
2148 isWhiteSpace = Character.isWhitespace(currentCharacter);
2150 } while (isWhiteSpace);
2151 // -------consume token until } is found---------
2152 switch (currentCharacter) {
2163 test = getNextChar('\\');
2166 scanDoubleQuotedEscapeCharacter();
2167 } catch (InvalidInputException ex) {
2171 // try { // consume next character
2172 unicodeAsBackSlash = false;
2173 currentCharacter = source[currentPosition++];
2174 // if (((currentCharacter = source[currentPosition++]) == '\\')
2175 // && (source[currentPosition] == 'u')) {
2176 // getNextUnicodeChar();
2178 if (withoutUnicodePtr != 0) {
2179 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2182 // } catch (InvalidInputException ex) {
2190 // try { // consume next character
2191 unicodeAsBackSlash = false;
2192 currentCharacter = source[currentPosition++];
2193 // if (((currentCharacter = source[currentPosition++]) == '\\')
2194 // && (source[currentPosition] == 'u')) {
2195 // getNextUnicodeChar();
2197 if (withoutUnicodePtr != 0) {
2198 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2201 // } catch (InvalidInputException ex) {
2203 while (currentCharacter != '"') {
2204 if (currentCharacter == '\r') {
2205 if (source[currentPosition] == '\n')
2208 // the string cannot go further that the line
2210 if (currentCharacter == '\n') {
2212 // the string cannot go further that the line
2214 if (currentCharacter == '\\') {
2216 scanDoubleQuotedEscapeCharacter();
2217 } catch (InvalidInputException ex) {
2221 // try { // consume next character
2222 unicodeAsBackSlash = false;
2223 currentCharacter = source[currentPosition++];
2224 // if (((currentCharacter = source[currentPosition++]) == '\\')
2225 // && (source[currentPosition] == 'u')) {
2226 // getNextUnicodeChar();
2228 if (withoutUnicodePtr != 0) {
2229 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2232 // } catch (InvalidInputException ex) {
2235 } catch (IndexOutOfBoundsException e) {
2241 if ((test = getNextChar('/', '*')) == 0) {
2244 // get the next char
2245 currentCharacter = source[currentPosition++];
2246 // if (((currentCharacter = source[currentPosition++]) ==
2248 // && (source[currentPosition] == 'u')) {
2249 // //-------------unicode traitement ------------
2250 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2251 // currentPosition++;
2252 // while (source[currentPosition] == 'u') {
2253 // currentPosition++;
2256 // Character.getNumericValue(source[currentPosition++]))
2260 // Character.getNumericValue(source[currentPosition++]))
2264 // Character.getNumericValue(source[currentPosition++]))
2268 // Character.getNumericValue(source[currentPosition++]))
2271 // //error don't care of the value
2272 // currentCharacter = 'A';
2273 // } //something different from \n and \r
2275 // currentCharacter =
2276 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2279 while (currentCharacter != '\r' && currentCharacter != '\n') {
2280 // get the next char
2281 currentCharacter = source[currentPosition++];
2282 // if (((currentCharacter = source[currentPosition++])
2284 // && (source[currentPosition] == 'u')) {
2285 // //-------------unicode traitement ------------
2286 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2287 // currentPosition++;
2288 // while (source[currentPosition] == 'u') {
2289 // currentPosition++;
2292 // Character.getNumericValue(source[currentPosition++]))
2296 // Character.getNumericValue(source[currentPosition++]))
2300 // Character.getNumericValue(source[currentPosition++]))
2304 // Character.getNumericValue(source[currentPosition++]))
2307 // //error don't care of the value
2308 // currentCharacter = 'A';
2309 // } //something different from \n and \r
2311 // currentCharacter =
2312 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2316 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2317 pushLineSeparator();
2318 } catch (IndexOutOfBoundsException e) {
2319 } // an eof will them be generated
2323 // traditional and annotation comment
2324 boolean star = false;
2325 // try { // consume next character
2326 unicodeAsBackSlash = false;
2327 currentCharacter = source[currentPosition++];
2328 // if (((currentCharacter = source[currentPosition++]) == '\\')
2329 // && (source[currentPosition] == 'u')) {
2330 // getNextUnicodeChar();
2332 if (withoutUnicodePtr != 0) {
2333 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2336 // } catch (InvalidInputException ex) {
2338 if (currentCharacter == '*') {
2341 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2342 pushLineSeparator();
2343 try { // get the next char
2344 currentCharacter = source[currentPosition++];
2345 // if (((currentCharacter = source[currentPosition++]) ==
2347 // && (source[currentPosition] == 'u')) {
2348 // //-------------unicode traitement ------------
2349 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2350 // currentPosition++;
2351 // while (source[currentPosition] == 'u') {
2352 // currentPosition++;
2355 // Character.getNumericValue(source[currentPosition++]))
2359 // Character.getNumericValue(source[currentPosition++]))
2363 // Character.getNumericValue(source[currentPosition++]))
2367 // Character.getNumericValue(source[currentPosition++]))
2370 // //error don't care of the value
2371 // currentCharacter = 'A';
2372 // } //something different from * and /
2374 // currentCharacter =
2375 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2378 // loop until end of comment */
2379 while ((currentCharacter != '/') || (!star)) {
2380 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2381 pushLineSeparator();
2382 star = currentCharacter == '*';
2384 currentCharacter = source[currentPosition++];
2385 // if (((currentCharacter = source[currentPosition++])
2387 // && (source[currentPosition] == 'u')) {
2388 // //-------------unicode traitement ------------
2389 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2390 // currentPosition++;
2391 // while (source[currentPosition] == 'u') {
2392 // currentPosition++;
2395 // Character.getNumericValue(source[currentPosition++]))
2399 // Character.getNumericValue(source[currentPosition++]))
2403 // Character.getNumericValue(source[currentPosition++]))
2407 // Character.getNumericValue(source[currentPosition++]))
2410 // //error don't care of the value
2411 // currentCharacter = 'A';
2412 // } //something different from * and /
2414 // currentCharacter =
2415 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2419 } catch (IndexOutOfBoundsException e) {
2427 if (isPHPIdentOrVarStart(currentCharacter) ) {
2429 scanIdentifierOrKeyword((currentCharacter == '$'));
2430 } catch (InvalidInputException ex) {
2435 if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
2436 // if (Character.isDigit(currentCharacter)) {
2439 } catch (InvalidInputException ex) {
2446 // -----------------end switch while try--------------------
2447 } catch (IndexOutOfBoundsException e) {
2448 } catch (InvalidInputException e) {
2453 // public final boolean jumpOverUnicodeWhiteSpace()
2454 // throws InvalidInputException {
2456 // //handle the case of unicode. Jump over the next whiteSpace
2457 // //making startPosition pointing on the next available char
2458 // //On false, the currentCharacter is filled up with a potential
2462 // this.wasAcr = false;
2463 // int c1, c2, c3, c4;
2464 // int unicodeSize = 6;
2465 // currentPosition++;
2466 // while (source[currentPosition] == 'u') {
2467 // currentPosition++;
2471 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2473 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2475 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2477 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2479 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2482 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2483 // if (recordLineSeparator
2484 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2485 // pushLineSeparator();
2486 // if (Character.isWhitespace(currentCharacter))
2489 // //buffer the new char which is not a white space
2490 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2491 // //withoutUnicodePtr == 1 is true here
2493 // } catch (IndexOutOfBoundsException e) {
2494 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2497 public final int[] getLineEnds() {
2498 // return a bounded copy of this.lineEnds
2500 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2504 public char[] getSource() {
2508 public static boolean isIdentifierOrKeyword(int token) {
2509 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2512 final char[] optimizedCurrentTokenSource1() {
2513 // return always the same char[] build only once
2514 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2515 char charOne = source[startPosition];
2570 return new char[] { charOne };
2574 final char[] optimizedCurrentTokenSource2() {
2576 c0 = source[startPosition];
2577 c1 = source[startPosition + 1];
2579 // return always the same char[] build only once
2580 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2583 return charArray_va;
2585 return charArray_vb;
2587 return charArray_vc;
2589 return charArray_vd;
2591 return charArray_ve;
2593 return charArray_vf;
2595 return charArray_vg;
2597 return charArray_vh;
2599 return charArray_vi;
2601 return charArray_vj;
2603 return charArray_vk;
2605 return charArray_vl;
2607 return charArray_vm;
2609 return charArray_vn;
2611 return charArray_vo;
2613 return charArray_vp;
2615 return charArray_vq;
2617 return charArray_vr;
2619 return charArray_vs;
2621 return charArray_vt;
2623 return charArray_vu;
2625 return charArray_vv;
2627 return charArray_vw;
2629 return charArray_vx;
2631 return charArray_vy;
2633 return charArray_vz;
2636 // try to return the same char[] build only once
2637 int hash = ((c0 << 6) + c1) % TableSize;
2638 char[][] table = charArray_length[0][hash];
2640 while (++i < InternalTableSize) {
2641 char[] charArray = table[i];
2642 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2645 // ---------other side---------
2647 int max = newEntry2;
2648 while (++i <= max) {
2649 char[] charArray = table[i];
2650 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2653 // --------add the entry-------
2654 if (++max >= InternalTableSize)
2657 table[max] = (r = new char[] { c0, c1 });
2662 final char[] optimizedCurrentTokenSource3() {
2663 // try to return the same char[] build only once
2665 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2667 char[][] table = charArray_length[1][hash];
2669 while (++i < InternalTableSize) {
2670 char[] charArray = table[i];
2671 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2674 // ---------other side---------
2676 int max = newEntry3;
2677 while (++i <= max) {
2678 char[] charArray = table[i];
2679 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2682 // --------add the entry-------
2683 if (++max >= InternalTableSize)
2686 table[max] = (r = new char[] { c0, c1, c2 });
2691 final char[] optimizedCurrentTokenSource4() {
2692 // try to return the same char[] build only once
2693 char c0, c1, c2, c3;
2694 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2695 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2697 char[][] table = charArray_length[2][(int) hash];
2699 while (++i < InternalTableSize) {
2700 char[] charArray = table[i];
2701 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2704 // ---------other side---------
2706 int max = newEntry4;
2707 while (++i <= max) {
2708 char[] charArray = table[i];
2709 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2712 // --------add the entry-------
2713 if (++max >= InternalTableSize)
2716 table[max] = (r = new char[] { c0, c1, c2, c3 });
2721 final char[] optimizedCurrentTokenSource5() {
2722 // try to return the same char[] build only once
2723 char c0, c1, c2, c3, c4;
2724 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2725 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2727 char[][] table = charArray_length[3][(int) hash];
2729 while (++i < InternalTableSize) {
2730 char[] charArray = table[i];
2731 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2734 // ---------other side---------
2736 int max = newEntry5;
2737 while (++i <= max) {
2738 char[] charArray = table[i];
2739 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2742 // --------add the entry-------
2743 if (++max >= InternalTableSize)
2746 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2751 final char[] optimizedCurrentTokenSource6() {
2752 // try to return the same char[] build only once
2753 char c0, c1, c2, c3, c4, c5;
2754 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2755 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2756 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2758 char[][] table = charArray_length[4][(int) hash];
2760 while (++i < InternalTableSize) {
2761 char[] charArray = table[i];
2762 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2763 && (c5 == charArray[5]))
2766 // ---------other side---------
2768 int max = newEntry6;
2769 while (++i <= max) {
2770 char[] charArray = table[i];
2771 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2772 && (c5 == charArray[5]))
2775 // --------add the entry-------
2776 if (++max >= InternalTableSize)
2779 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2784 public final void pushLineSeparator() throws InvalidInputException {
2785 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2786 final int INCREMENT = 250;
2787 if (this.checkNonExternalizedStringLiterals) {
2788 // reinitialize the current line for non externalize strings purpose
2791 // currentCharacter is at position currentPosition-1
2793 if (currentCharacter == '\r') {
2794 int separatorPos = currentPosition - 1;
2795 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2797 // System.out.println("CR-" + separatorPos);
2799 lineEnds[++linePtr] = separatorPos;
2800 } catch (IndexOutOfBoundsException e) {
2801 // linePtr value is correct
2802 int oldLength = lineEnds.length;
2803 int[] old = lineEnds;
2804 lineEnds = new int[oldLength + INCREMENT];
2805 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2806 lineEnds[linePtr] = separatorPos;
2808 // look-ahead for merged cr+lf
2810 if (source[currentPosition] == '\n') {
2811 // System.out.println("look-ahead LF-" + currentPosition);
2812 lineEnds[linePtr] = currentPosition;
2818 } catch (IndexOutOfBoundsException e) {
2823 if (currentCharacter == '\n') {
2824 // must merge eventual cr followed by lf
2825 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2826 // System.out.println("merge LF-" + (currentPosition - 1));
2827 lineEnds[linePtr] = currentPosition - 1;
2829 int separatorPos = currentPosition - 1;
2830 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2832 // System.out.println("LF-" + separatorPos);
2834 lineEnds[++linePtr] = separatorPos;
2835 } catch (IndexOutOfBoundsException e) {
2836 // linePtr value is correct
2837 int oldLength = lineEnds.length;
2838 int[] old = lineEnds;
2839 lineEnds = new int[oldLength + INCREMENT];
2840 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2841 lineEnds[linePtr] = separatorPos;
2849 public final void pushUnicodeLineSeparator() {
2850 // isUnicode means that the \r or \n has been read as a unicode character
2851 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2852 final int INCREMENT = 250;
2853 // currentCharacter is at position currentPosition-1
2854 if (this.checkNonExternalizedStringLiterals) {
2855 // reinitialize the current line for non externalize strings purpose
2859 if (currentCharacter == '\r') {
2860 int separatorPos = currentPosition - 6;
2861 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2863 // System.out.println("CR-" + separatorPos);
2865 lineEnds[++linePtr] = separatorPos;
2866 } catch (IndexOutOfBoundsException e) {
2867 // linePtr value is correct
2868 int oldLength = lineEnds.length;
2869 int[] old = lineEnds;
2870 lineEnds = new int[oldLength + INCREMENT];
2871 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2872 lineEnds[linePtr] = separatorPos;
2874 // look-ahead for merged cr+lf
2875 if (source[currentPosition] == '\n') {
2876 // System.out.println("look-ahead LF-" + currentPosition);
2877 lineEnds[linePtr] = currentPosition;
2885 if (currentCharacter == '\n') {
2886 // must merge eventual cr followed by lf
2887 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2888 // System.out.println("merge LF-" + (currentPosition - 1));
2889 lineEnds[linePtr] = currentPosition - 6;
2891 int separatorPos = currentPosition - 6;
2892 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2894 // System.out.println("LF-" + separatorPos);
2896 lineEnds[++linePtr] = separatorPos;
2897 } catch (IndexOutOfBoundsException e) {
2898 // linePtr value is correct
2899 int oldLength = lineEnds.length;
2900 int[] old = lineEnds;
2901 lineEnds = new int[oldLength + INCREMENT];
2902 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2903 lineEnds[linePtr] = separatorPos;
2911 public void recordComment(int token) {
2913 int stopPosition = this.currentPosition;
2915 case TokenNameCOMMENT_LINE:
2916 stopPosition = -this.lastCommentLinePosition;
2918 case TokenNameCOMMENT_BLOCK:
2919 stopPosition = -this.currentPosition;
2923 // a new comment is recorded
2924 int length = this.commentStops.length;
2925 if (++this.commentPtr >= length) {
2926 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2927 // grows the positions buffers too
2928 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2930 this.commentStops[this.commentPtr] = stopPosition;
2931 this.commentStarts[this.commentPtr] = this.startPosition;
2934 // public final void recordComment(boolean isJavadoc) {
2935 // // a new annotation comment is recorded
2937 // commentStops[++commentPtr] = isJavadoc
2938 // ? currentPosition
2939 // : -currentPosition;
2940 // } catch (IndexOutOfBoundsException e) {
2941 // int oldStackLength = commentStops.length;
2942 // int[] oldStack = commentStops;
2943 // commentStops = new int[oldStackLength + 30];
2944 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2945 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2946 // //grows the positions buffers too
2947 // int[] old = commentStarts;
2948 // commentStarts = new int[oldStackLength + 30];
2949 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2951 // //the buffer is of a correct size here
2952 // commentStarts[commentPtr] = startPosition;
2954 public void resetTo(int begin, int end) {
2955 // reset the scanner to a given position where it may rescan again
2957 initialPosition = startPosition = currentPosition = begin;
2958 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2959 commentPtr = -1; // reset comment stack
2962 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2963 // the string with "\\u" is a legal string of two chars \ and u
2964 // thus we use a direct access to the source (for regular cases).
2965 // if (unicodeAsBackSlash) {
2966 // // consume next character
2967 // unicodeAsBackSlash = false;
2968 // if (((currentCharacter = source[currentPosition++]) == '\\')
2969 // && (source[currentPosition] == 'u')) {
2970 // getNextUnicodeChar();
2972 // if (withoutUnicodePtr != 0) {
2973 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2977 currentCharacter = source[currentPosition++];
2978 switch (currentCharacter) {
2980 currentCharacter = '\'';
2983 currentCharacter = '\\';
2986 currentCharacter = '\\';
2991 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2992 currentCharacter = source[currentPosition++];
2993 switch (currentCharacter) {
2995 // currentCharacter = '\b';
2998 currentCharacter = '\t';
3001 currentCharacter = '\n';
3004 // currentCharacter = '\f';
3007 currentCharacter = '\r';
3010 currentCharacter = '\"';
3013 currentCharacter = '\'';
3016 currentCharacter = '\\';
3019 currentCharacter = '$';
3022 // -----------octal escape--------------
3024 // OctalDigit OctalDigit
3025 // ZeroToThree OctalDigit OctalDigit
3026 int number = Character.getNumericValue(currentCharacter);
3027 if (number >= 0 && number <= 7) {
3028 boolean zeroToThreeNot = number > 3;
3029 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3030 int digit = Character.getNumericValue(currentCharacter);
3031 if (digit >= 0 && digit <= 7) {
3032 number = (number * 8) + digit;
3033 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3034 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3035 // Digit --> ignore last character
3038 digit = Character.getNumericValue(currentCharacter);
3039 if (digit >= 0 && digit <= 7) {
3040 // has read \ZeroToThree OctalDigit OctalDigit
3041 number = (number * 8) + digit;
3042 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3043 // --> ignore last character
3047 } else { // has read \OctalDigit NonDigit--> ignore last
3051 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3055 } else { // has read \OctalDigit --> ignore last character
3059 throw new InvalidInputException(INVALID_ESCAPE);
3060 currentCharacter = (char) number;
3063 // throw new InvalidInputException(INVALID_ESCAPE);
3067 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3068 // return scanIdentifierOrKeyword( false );
3070 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3072 // first dispatch on the first char.
3073 // then the length. If there are several
3074 // keywors with the same length AND the same first char, then do another
3075 // disptach on the second char :-)...cool....but fast !
3076 useAssertAsAnIndentifier = false;
3077 while (getNextCharAsJavaIdentifierPart()) {
3081 // if (new String(getCurrentTokenSource()).equals("$this")) {
3082 // return TokenNamethis;
3084 return TokenNameVariable;
3089 // if (withoutUnicodePtr == 0)
3090 // quick test on length == 1 but not on length > 12 while most identifier
3091 // have a length which is <= 12...but there are lots of identifier with
3092 // only one char....
3094 if ((length = currentPosition - startPosition) == 1)
3095 return TokenNameIdentifier;
3097 data = new char[length];
3098 index = startPosition;
3099 for (int i = 0; i < length; i++) {
3100 data[i] = Character.toLowerCase(source[index + i]);
3104 // if ((length = withoutUnicodePtr) == 1)
3105 // return TokenNameIdentifier;
3106 // // data = withoutUnicodeBuffer;
3107 // data = new char[withoutUnicodeBuffer.length];
3108 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3109 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3113 firstLetter = data[index];
3114 switch (firstLetter) {
3119 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3120 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3121 return TokenNameFILE;
3122 index = 0; // __LINE__
3123 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3124 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3125 return TokenNameLINE;
3129 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3130 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3131 return TokenNameCLASS_C;
3135 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3136 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3137 && (data[++index] == '_'))
3138 return TokenNameMETHOD_C;
3142 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3143 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3144 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3145 return TokenNameFUNC_C;
3148 return TokenNameIdentifier;
3150 // as and array abstract
3154 if ((data[++index] == 's')) {
3157 return TokenNameIdentifier;
3161 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3162 return TokenNameand;
3164 return TokenNameIdentifier;
3168 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3169 return TokenNamearray;
3171 return TokenNameIdentifier;
3173 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3174 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3175 return TokenNameabstract;
3177 return TokenNameIdentifier;
3179 return TokenNameIdentifier;
3185 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3186 return TokenNamebreak;
3188 return TokenNameIdentifier;
3190 return TokenNameIdentifier;
3193 // case catch class clone const continue
3196 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3197 return TokenNamecase;
3199 return TokenNameIdentifier;
3201 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3202 return TokenNamecatch;
3204 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3205 return TokenNameclass;
3207 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3208 return TokenNameclone;
3210 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3211 return TokenNameconst;
3213 return TokenNameIdentifier;
3215 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3216 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3217 return TokenNamecontinue;
3219 return TokenNameIdentifier;
3221 return TokenNameIdentifier;
3224 // declare default do die
3225 // TODO delete define ==> no keyword !
3228 if ((data[++index] == 'o'))
3231 return TokenNameIdentifier;
3233 // if ((data[++index] == 'e')
3234 // && (data[++index] == 'f')
3235 // && (data[++index] == 'i')
3236 // && (data[++index] == 'n')
3237 // && (data[++index] == 'e'))
3238 // return TokenNamedefine;
3240 // return TokenNameIdentifier;
3242 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3243 && (data[++index] == 'r') && (data[++index] == 'e'))
3244 return TokenNamedeclare;
3246 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3247 && (data[++index] == 'l') && (data[++index] == 't'))
3248 return TokenNamedefault;
3250 return TokenNameIdentifier;
3252 return TokenNameIdentifier;
3255 // echo else exit elseif extends eval
3258 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3259 return TokenNameecho;
3260 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3261 return TokenNameelse;
3262 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3263 return TokenNameexit;
3264 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3265 return TokenNameeval;
3267 return TokenNameIdentifier;
3270 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3271 return TokenNameendif;
3272 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3273 return TokenNameempty;
3275 return TokenNameIdentifier;
3278 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3279 && (data[++index] == 'r'))
3280 return TokenNameendfor;
3281 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3282 && (data[++index] == 'f'))
3283 return TokenNameelseif;
3285 return TokenNameIdentifier;
3287 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3288 && (data[++index] == 'd') && (data[++index] == 's'))
3289 return TokenNameextends;
3291 return TokenNameIdentifier;
3294 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3295 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3296 return TokenNameendwhile;
3298 return TokenNameIdentifier;
3301 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3302 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3303 return TokenNameendswitch;
3305 return TokenNameIdentifier;
3308 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3309 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3310 && (data[++index] == 'e'))
3311 return TokenNameenddeclare;
3313 if ((data[++index] == 'n') // endforeach
3314 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3315 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3316 return TokenNameendforeach;
3318 return TokenNameIdentifier;
3320 return TokenNameIdentifier;
3323 // for false final function
3326 if ((data[++index] == 'o') && (data[++index] == 'r'))
3327 return TokenNamefor;
3329 return TokenNameIdentifier;
3331 // if ((data[++index] == 'a') && (data[++index] == 'l')
3332 // && (data[++index] == 's') && (data[++index] == 'e'))
3333 // return TokenNamefalse;
3334 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3335 return TokenNamefinal;
3337 return TokenNameIdentifier;
3340 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3341 && (data[++index] == 'c') && (data[++index] == 'h'))
3342 return TokenNameforeach;
3344 return TokenNameIdentifier;
3347 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3348 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3349 return TokenNamefunction;
3351 return TokenNameIdentifier;
3353 return TokenNameIdentifier;
3358 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3359 && (data[++index] == 'l')) {
3360 return TokenNameglobal;
3363 return TokenNameIdentifier;
3365 // if int isset include include_once instanceof interface implements
3368 if (data[++index] == 'f')
3371 return TokenNameIdentifier;
3373 // if ((data[++index] == 'n') && (data[++index] == 't'))
3374 // return TokenNameint;
3376 // return TokenNameIdentifier;
3378 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3379 return TokenNameisset;
3381 return TokenNameIdentifier;
3383 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3384 && (data[++index] == 'd') && (data[++index] == 'e'))
3385 return TokenNameinclude;
3387 return TokenNameIdentifier;
3390 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3391 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3392 return TokenNameinterface;
3394 return TokenNameIdentifier;
3397 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3398 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3399 && (data[++index] == 'f'))
3400 return TokenNameinstanceof;
3401 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3402 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3403 && (data[++index] == 's'))
3404 return TokenNameimplements;
3406 return TokenNameIdentifier;
3408 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3409 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3410 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3411 return TokenNameinclude_once;
3413 return TokenNameIdentifier;
3415 return TokenNameIdentifier;
3420 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3421 return TokenNamelist;
3424 return TokenNameIdentifier;
3429 if ((data[++index] == 'e') && (data[++index] == 'w'))
3430 return TokenNamenew;
3432 return TokenNameIdentifier;
3434 // if ((data[++index] == 'u') && (data[++index] == 'l')
3435 // && (data[++index] == 'l'))
3436 // return TokenNamenull;
3438 // return TokenNameIdentifier;
3440 return TokenNameIdentifier;
3445 if (data[++index] == 'r') {
3449 // if (length == 12) {
3450 // if ((data[++index] == 'l')
3451 // && (data[++index] == 'd')
3452 // && (data[++index] == '_')
3453 // && (data[++index] == 'f')
3454 // && (data[++index] == 'u')
3455 // && (data[++index] == 'n')
3456 // && (data[++index] == 'c')
3457 // && (data[++index] == 't')
3458 // && (data[++index] == 'i')
3459 // && (data[++index] == 'o')
3460 // && (data[++index] == 'n')) {
3461 // return TokenNameold_function;
3464 return TokenNameIdentifier;
3466 // print public private protected
3469 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3470 return TokenNameprint;
3472 return TokenNameIdentifier;
3474 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3475 && (data[++index] == 'c')) {
3476 return TokenNamepublic;
3478 return TokenNameIdentifier;
3480 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3481 && (data[++index] == 't') && (data[++index] == 'e')) {
3482 return TokenNameprivate;
3484 return TokenNameIdentifier;
3486 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3487 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3488 return TokenNameprotected;
3490 return TokenNameIdentifier;
3492 return TokenNameIdentifier;
3494 // return require require_once
3496 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3497 && (data[++index] == 'n')) {
3498 return TokenNamereturn;
3500 } else if (length == 7) {
3501 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3502 && (data[++index] == 'r') && (data[++index] == 'e')) {
3503 return TokenNamerequire;
3505 } else if (length == 12) {
3506 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3507 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3508 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3509 return TokenNamerequire_once;
3512 return TokenNameIdentifier;
3517 if (data[++index] == 't')
3518 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3519 return TokenNamestatic;
3521 return TokenNameIdentifier;
3522 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3523 && (data[++index] == 'h'))
3524 return TokenNameswitch;
3526 return TokenNameIdentifier;
3528 return TokenNameIdentifier;
3534 if ((data[++index] == 'r') && (data[++index] == 'y'))
3535 return TokenNametry;
3537 return TokenNameIdentifier;
3539 // if ((data[++index] == 'r') && (data[++index] == 'u')
3540 // && (data[++index] == 'e'))
3541 // return TokenNametrue;
3543 // return TokenNameIdentifier;
3545 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3546 return TokenNamethrow;
3548 return TokenNameIdentifier;
3550 return TokenNameIdentifier;
3556 if ((data[++index] == 's') && (data[++index] == 'e'))
3557 return TokenNameuse;
3559 return TokenNameIdentifier;
3561 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3562 return TokenNameunset;
3564 return TokenNameIdentifier;
3566 return TokenNameIdentifier;
3572 if ((data[++index] == 'a') && (data[++index] == 'r'))
3573 return TokenNamevar;
3575 return TokenNameIdentifier;
3577 return TokenNameIdentifier;
3583 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3584 return TokenNamewhile;
3586 return TokenNameIdentifier;
3587 // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3588 // (data[++index]=='e') && (data[++index]=='f')&&
3589 // (data[++index]=='p'))
3590 // return TokenNamewidefp ;
3592 // return TokenNameIdentifier;
3594 return TokenNameIdentifier;
3600 if ((data[++index] == 'o') && (data[++index] == 'r'))
3601 return TokenNamexor;
3603 return TokenNameIdentifier;
3605 return TokenNameIdentifier;
3608 return TokenNameIdentifier;
3612 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3613 // when entering this method the currentCharacter is the firt
3614 // digit of the number , i.e. it may be preceeded by a . when
3615 // dotPrefix is true
3616 boolean floating = dotPrefix;
3617 if ((!dotPrefix) && (currentCharacter == '0')) {
3618 if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3619 // force the first char of the hexa number do exist...
3620 // consume next character
3621 unicodeAsBackSlash = false;
3622 currentCharacter = source[currentPosition++];
3623 // if (((currentCharacter = source[currentPosition++]) == '\\')
3624 // && (source[currentPosition] == 'u')) {
3625 // getNextUnicodeChar();
3627 // if (withoutUnicodePtr != 0) {
3628 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3631 if (Character.digit(currentCharacter, 16) == -1)
3632 throw new InvalidInputException(INVALID_HEXA);
3634 while (getNextCharAsDigit(16)) {
3637 // if (getNextChar('l', 'L') >= 0)
3638 // return TokenNameLongLiteral;
3640 return TokenNameIntegerLiteral;
3642 // there is x or X in the number
3643 // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3644 // 00078.0 is true !!!!! crazy language
3645 if (getNextCharAsDigit()) {
3646 // -------------potential octal-----------------
3647 while (getNextCharAsDigit()) {
3650 // if (getNextChar('l', 'L') >= 0) {
3651 // return TokenNameLongLiteral;
3654 // if (getNextChar('f', 'F') >= 0) {
3655 // return TokenNameFloatingPointLiteral;
3657 if (getNextChar('d', 'D') >= 0) {
3658 return TokenNameDoubleLiteral;
3659 } else { // make the distinction between octal and float ....
3660 if (getNextChar('.')) { // bingo ! ....
3661 while (getNextCharAsDigit()) {
3664 if (getNextChar('e', 'E') >= 0) {
3665 // consume next character
3666 unicodeAsBackSlash = false;
3667 currentCharacter = source[currentPosition++];
3668 // if (((currentCharacter = source[currentPosition++]) == '\\')
3669 // && (source[currentPosition] == 'u')) {
3670 // getNextUnicodeChar();
3672 // if (withoutUnicodePtr != 0) {
3673 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3676 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3677 // consume next character
3678 unicodeAsBackSlash = false;
3679 currentCharacter = source[currentPosition++];
3680 // if (((currentCharacter = source[currentPosition++]) == '\\')
3681 // && (source[currentPosition] == 'u')) {
3682 // getNextUnicodeChar();
3684 // if (withoutUnicodePtr != 0) {
3685 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3686 // currentCharacter;
3690 if (!Character.isDigit(currentCharacter))
3691 throw new InvalidInputException(INVALID_FLOAT);
3692 while (getNextCharAsDigit()) {
3696 // if (getNextChar('f', 'F') >= 0)
3697 // return TokenNameFloatingPointLiteral;
3698 getNextChar('d', 'D'); // jump over potential d or D
3699 return TokenNameDoubleLiteral;
3701 return TokenNameIntegerLiteral;
3708 while (getNextCharAsDigit()) {
3711 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3712 // return TokenNameLongLiteral;
3713 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3714 while (getNextCharAsDigit()) {
3719 // if floating is true both exponant and suffix may be optional
3720 if (getNextChar('e', 'E') >= 0) {
3722 // consume next character
3723 unicodeAsBackSlash = false;
3724 currentCharacter = source[currentPosition++];
3725 // if (((currentCharacter = source[currentPosition++]) == '\\')
3726 // && (source[currentPosition] == 'u')) {
3727 // getNextUnicodeChar();
3729 // if (withoutUnicodePtr != 0) {
3730 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3733 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3736 unicodeAsBackSlash = false;
3737 currentCharacter = source[currentPosition++];
3738 // if (((currentCharacter = source[currentPosition++]) == '\\')
3739 // && (source[currentPosition] == 'u')) {
3740 // getNextUnicodeChar();
3742 // if (withoutUnicodePtr != 0) {
3743 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3747 if (!Character.isDigit(currentCharacter))
3748 throw new InvalidInputException(INVALID_FLOAT);
3749 while (getNextCharAsDigit()) {
3753 if (getNextChar('d', 'D') >= 0)
3754 return TokenNameDoubleLiteral;
3755 // if (getNextChar('f', 'F') >= 0)
3756 // return TokenNameFloatingPointLiteral;
3757 // the long flag has been tested before
3758 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3762 * Search the line number corresponding to a specific position
3765 public final int getLineNumber(int position) {
3766 if (lineEnds == null)
3768 int length = linePtr + 1;
3771 int g = 0, d = length - 1;
3775 if (position < lineEnds[m]) {
3777 } else if (position > lineEnds[m]) {
3783 if (position < lineEnds[m]) {
3789 public void setPHPMode(boolean mode) {
3793 public final void setSource(char[] source) {
3794 setSource(null, source);
3797 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3798 // the source-buffer is set to sourceString
3799 this.compilationUnit = compilationUnit;
3800 if (source == null) {
3801 this.source = new char[0];
3803 this.source = source;
3806 initialPosition = currentPosition = 0;
3807 containsAssertKeyword = false;
3808 withoutUnicodeBuffer = new char[this.source.length];
3809 // encapsedStringStack = new Stack();
3812 public String toString() {
3813 if (startPosition == source.length)
3814 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3815 if (currentPosition > source.length)
3816 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3817 char front[] = new char[startPosition];
3818 System.arraycopy(source, 0, front, 0, startPosition);
3819 int middleLength = (currentPosition - 1) - startPosition + 1;
3821 if (middleLength > -1) {
3822 middle = new char[middleLength];
3823 System.arraycopy(source, startPosition, middle, 0, middleLength);
3825 middle = new char[0];
3827 char end[] = new char[source.length - (currentPosition - 1)];
3828 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3829 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3830 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3834 public final String toStringAction(int act) {
3836 case TokenNameERROR:
3837 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3839 case TokenNameINLINE_HTML:
3840 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3841 case TokenNameIdentifier:
3842 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3843 case TokenNameVariable:
3844 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3845 case TokenNameabstract:
3846 return "abstract"; //$NON-NLS-1$
3848 return "AND"; //$NON-NLS-1$
3849 case TokenNamearray:
3850 return "array"; //$NON-NLS-1$
3852 return "as"; //$NON-NLS-1$
3853 case TokenNamebreak:
3854 return "break"; //$NON-NLS-1$
3856 return "case"; //$NON-NLS-1$
3857 case TokenNameclass:
3858 return "class"; //$NON-NLS-1$
3859 case TokenNamecatch:
3860 return "catch"; //$NON-NLS-1$
3861 case TokenNameclone:
3864 case TokenNameconst:
3867 case TokenNamecontinue:
3868 return "continue"; //$NON-NLS-1$
3869 case TokenNamedefault:
3870 return "default"; //$NON-NLS-1$
3871 // case TokenNamedefine :
3872 // return "define"; //$NON-NLS-1$
3874 return "do"; //$NON-NLS-1$
3876 return "echo"; //$NON-NLS-1$
3878 return "else"; //$NON-NLS-1$
3879 case TokenNameelseif:
3880 return "elseif"; //$NON-NLS-1$
3881 case TokenNameendfor:
3882 return "endfor"; //$NON-NLS-1$
3883 case TokenNameendforeach:
3884 return "endforeach"; //$NON-NLS-1$
3885 case TokenNameendif:
3886 return "endif"; //$NON-NLS-1$
3887 case TokenNameendswitch:
3888 return "endswitch"; //$NON-NLS-1$
3889 case TokenNameendwhile:
3890 return "endwhile"; //$NON-NLS-1$
3893 case TokenNameextends:
3894 return "extends"; //$NON-NLS-1$
3895 // case TokenNamefalse :
3896 // return "false"; //$NON-NLS-1$
3897 case TokenNamefinal:
3898 return "final"; //$NON-NLS-1$
3900 return "for"; //$NON-NLS-1$
3901 case TokenNameforeach:
3902 return "foreach"; //$NON-NLS-1$
3903 case TokenNamefunction:
3904 return "function"; //$NON-NLS-1$
3905 case TokenNameglobal:
3906 return "global"; //$NON-NLS-1$
3908 return "if"; //$NON-NLS-1$
3909 case TokenNameimplements:
3910 return "implements"; //$NON-NLS-1$
3911 case TokenNameinclude:
3912 return "include"; //$NON-NLS-1$
3913 case TokenNameinclude_once:
3914 return "include_once"; //$NON-NLS-1$
3915 case TokenNameinstanceof:
3916 return "instanceof"; //$NON-NLS-1$
3917 case TokenNameinterface:
3918 return "interface"; //$NON-NLS-1$
3919 case TokenNameisset:
3920 return "isset"; //$NON-NLS-1$
3922 return "list"; //$NON-NLS-1$
3924 return "new"; //$NON-NLS-1$
3925 // case TokenNamenull :
3926 // return "null"; //$NON-NLS-1$
3928 return "OR"; //$NON-NLS-1$
3929 case TokenNameprint:
3930 return "print"; //$NON-NLS-1$
3931 case TokenNameprivate:
3932 return "private"; //$NON-NLS-1$
3933 case TokenNameprotected:
3934 return "protected"; //$NON-NLS-1$
3935 case TokenNamepublic:
3936 return "public"; //$NON-NLS-1$
3937 case TokenNamerequire:
3938 return "require"; //$NON-NLS-1$
3939 case TokenNamerequire_once:
3940 return "require_once"; //$NON-NLS-1$
3941 case TokenNamereturn:
3942 return "return"; //$NON-NLS-1$
3943 case TokenNamestatic:
3944 return "static"; //$NON-NLS-1$
3945 case TokenNameswitch:
3946 return "switch"; //$NON-NLS-1$
3947 // case TokenNametrue :
3948 // return "true"; //$NON-NLS-1$
3949 case TokenNameunset:
3950 return "unset"; //$NON-NLS-1$
3952 return "var"; //$NON-NLS-1$
3953 case TokenNamewhile:
3954 return "while"; //$NON-NLS-1$
3956 return "XOR"; //$NON-NLS-1$
3957 // case TokenNamethis :
3958 // return "$this"; //$NON-NLS-1$
3959 case TokenNameIntegerLiteral:
3960 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3961 case TokenNameDoubleLiteral:
3962 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3963 case TokenNameStringDoubleQuote:
3964 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3965 case TokenNameStringSingleQuote:
3966 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3967 case TokenNameStringInterpolated:
3968 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3969 case TokenNameEncapsedString0:
3970 return "`"; //$NON-NLS-1$
3971 // case TokenNameEncapsedString1:
3972 // return "\'"; //$NON-NLS-1$
3973 // case TokenNameEncapsedString2:
3974 // return "\""; //$NON-NLS-1$
3975 case TokenNameSTRING:
3976 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3977 case TokenNameHEREDOC:
3978 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3979 case TokenNamePLUS_PLUS:
3980 return "++"; //$NON-NLS-1$
3981 case TokenNameMINUS_MINUS:
3982 return "--"; //$NON-NLS-1$
3983 case TokenNameEQUAL_EQUAL:
3984 return "=="; //$NON-NLS-1$
3985 case TokenNameEQUAL_EQUAL_EQUAL:
3986 return "==="; //$NON-NLS-1$
3987 case TokenNameEQUAL_GREATER:
3988 return "=>"; //$NON-NLS-1$
3989 case TokenNameLESS_EQUAL:
3990 return "<="; //$NON-NLS-1$
3991 case TokenNameGREATER_EQUAL:
3992 return ">="; //$NON-NLS-1$
3993 case TokenNameNOT_EQUAL:
3994 return "!="; //$NON-NLS-1$
3995 case TokenNameNOT_EQUAL_EQUAL:
3996 return "!=="; //$NON-NLS-1$
3997 case TokenNameLEFT_SHIFT:
3998 return "<<"; //$NON-NLS-1$
3999 case TokenNameRIGHT_SHIFT:
4000 return ">>"; //$NON-NLS-1$
4001 case TokenNamePLUS_EQUAL:
4002 return "+="; //$NON-NLS-1$
4003 case TokenNameMINUS_EQUAL:
4004 return "-="; //$NON-NLS-1$
4005 case TokenNameMULTIPLY_EQUAL:
4006 return "*="; //$NON-NLS-1$
4007 case TokenNameDIVIDE_EQUAL:
4008 return "/="; //$NON-NLS-1$
4009 case TokenNameAND_EQUAL:
4010 return "&="; //$NON-NLS-1$
4011 case TokenNameOR_EQUAL:
4012 return "|="; //$NON-NLS-1$
4013 case TokenNameXOR_EQUAL:
4014 return "^="; //$NON-NLS-1$
4015 case TokenNameREMAINDER_EQUAL:
4016 return "%="; //$NON-NLS-1$
4017 case TokenNameDOT_EQUAL:
4018 return ".="; //$NON-NLS-1$
4019 case TokenNameLEFT_SHIFT_EQUAL:
4020 return "<<="; //$NON-NLS-1$
4021 case TokenNameRIGHT_SHIFT_EQUAL:
4022 return ">>="; //$NON-NLS-1$
4023 case TokenNameOR_OR:
4024 return "||"; //$NON-NLS-1$
4025 case TokenNameAND_AND:
4026 return "&&"; //$NON-NLS-1$
4028 return "+"; //$NON-NLS-1$
4029 case TokenNameMINUS:
4030 return "-"; //$NON-NLS-1$
4031 case TokenNameMINUS_GREATER:
4034 return "!"; //$NON-NLS-1$
4035 case TokenNameREMAINDER:
4036 return "%"; //$NON-NLS-1$
4038 return "^"; //$NON-NLS-1$
4040 return "&"; //$NON-NLS-1$
4041 case TokenNameMULTIPLY:
4042 return "*"; //$NON-NLS-1$
4044 return "|"; //$NON-NLS-1$
4045 case TokenNameTWIDDLE:
4046 return "~"; //$NON-NLS-1$
4047 case TokenNameTWIDDLE_EQUAL:
4048 return "~="; //$NON-NLS-1$
4049 case TokenNameDIVIDE:
4050 return "/"; //$NON-NLS-1$
4051 case TokenNameGREATER:
4052 return ">"; //$NON-NLS-1$
4054 return "<"; //$NON-NLS-1$
4055 case TokenNameLPAREN:
4056 return "("; //$NON-NLS-1$
4057 case TokenNameRPAREN:
4058 return ")"; //$NON-NLS-1$
4059 case TokenNameLBRACE:
4060 return "{"; //$NON-NLS-1$
4061 case TokenNameRBRACE:
4062 return "}"; //$NON-NLS-1$
4063 case TokenNameLBRACKET:
4064 return "["; //$NON-NLS-1$
4065 case TokenNameRBRACKET:
4066 return "]"; //$NON-NLS-1$
4067 case TokenNameSEMICOLON:
4068 return ";"; //$NON-NLS-1$
4069 case TokenNameQUESTION:
4070 return "?"; //$NON-NLS-1$
4071 case TokenNameCOLON:
4072 return ":"; //$NON-NLS-1$
4073 case TokenNameCOMMA:
4074 return ","; //$NON-NLS-1$
4076 return "."; //$NON-NLS-1$
4077 case TokenNameEQUAL:
4078 return "="; //$NON-NLS-1$
4081 case TokenNameDOLLAR:
4083 case TokenNameDOLLAR_LBRACE:
4085 case TokenNameLBRACE_DOLLAR:
4088 return "EOF"; //$NON-NLS-1$
4089 case TokenNameWHITESPACE:
4090 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4091 case TokenNameCOMMENT_LINE:
4092 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4093 case TokenNameCOMMENT_BLOCK:
4094 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4095 case TokenNameCOMMENT_PHPDOC:
4096 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4097 // case TokenNameHTML :
4098 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4101 return "__FILE__"; //$NON-NLS-1$
4103 return "__LINE__"; //$NON-NLS-1$
4104 case TokenNameCLASS_C:
4105 return "__CLASS__"; //$NON-NLS-1$
4106 case TokenNameMETHOD_C:
4107 return "__METHOD__"; //$NON-NLS-1$
4108 case TokenNameFUNC_C:
4109 return "__FUNCTION__"; //$NON-NLS-1
4110 case TokenNameboolCAST:
4111 return "( bool )"; //$NON-NLS-1$
4112 case TokenNameintCAST:
4113 return "( int )"; //$NON-NLS-1$
4114 case TokenNamedoubleCAST:
4115 return "( double )"; //$NON-NLS-1$
4116 case TokenNameobjectCAST:
4117 return "( object )"; //$NON-NLS-1$
4118 case TokenNamestringCAST:
4119 return "( string )"; //$NON-NLS-1$
4121 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4129 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4130 this(tokenizeComments, tokenizeWhiteSpace, false);
4133 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4134 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4137 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4138 boolean assertMode) {
4139 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4142 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4143 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4144 this.eofPosition = Integer.MAX_VALUE;
4145 this.tokenizeComments = tokenizeComments;
4146 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4147 this.tokenizeStrings = tokenizeStrings;
4148 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4149 // this.assertMode = assertMode;
4150 // this.encapsedStringStack = null;
4151 this.taskTags = taskTags;
4152 this.taskPriorities = taskPriorities;
4155 private void checkNonExternalizeString() throws InvalidInputException {
4156 if (currentLine == null)
4158 parseTags(currentLine);
4161 private void parseTags(NLSLine line) throws InvalidInputException {
4162 String s = new String(getCurrentTokenSource());
4163 int pos = s.indexOf(TAG_PREFIX);
4164 int lineLength = line.size();
4166 int start = pos + TAG_PREFIX_LENGTH;
4167 int end = s.indexOf(TAG_POSTFIX, start);
4168 String index = s.substring(start, end);
4171 i = Integer.parseInt(index) - 1;
4172 // Tags are one based not zero based.
4173 } catch (NumberFormatException e) {
4174 i = -1; // we don't want to consider this as a valid NLS tag
4176 if (line.exists(i)) {
4179 pos = s.indexOf(TAG_PREFIX, start);
4181 this.nonNLSStrings = new StringLiteral[lineLength];
4182 int nonNLSCounter = 0;
4183 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4184 StringLiteral literal = (StringLiteral) iterator.next();
4185 if (literal != null) {
4186 this.nonNLSStrings[nonNLSCounter++] = literal;
4189 if (nonNLSCounter == 0) {
4190 this.nonNLSStrings = null;
4194 this.wasNonExternalizedStringLiteral = true;
4195 if (nonNLSCounter != lineLength) {
4196 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4201 public final void scanEscapeCharacter() throws InvalidInputException {
4202 // the string with "\\u" is a legal string of two chars \ and u
4203 // thus we use a direct access to the source (for regular cases).
4204 if (unicodeAsBackSlash) {
4205 // consume next character
4206 unicodeAsBackSlash = false;
4207 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4208 // (source[currentPosition] == 'u')) {
4209 // getNextUnicodeChar();
4211 if (withoutUnicodePtr != 0) {
4212 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4216 currentCharacter = source[currentPosition++];
4217 switch (currentCharacter) {
4219 currentCharacter = '\b';
4222 currentCharacter = '\t';
4225 currentCharacter = '\n';
4228 currentCharacter = '\f';
4231 currentCharacter = '\r';
4234 currentCharacter = '\"';
4237 currentCharacter = '\'';
4240 currentCharacter = '\\';
4243 // -----------octal escape--------------
4245 // OctalDigit OctalDigit
4246 // ZeroToThree OctalDigit OctalDigit
4247 int number = Character.getNumericValue(currentCharacter);
4248 if (number >= 0 && number <= 7) {
4249 boolean zeroToThreeNot = number > 3;
4250 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4251 int digit = Character.getNumericValue(currentCharacter);
4252 if (digit >= 0 && digit <= 7) {
4253 number = (number * 8) + digit;
4254 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4255 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4256 // Digit --> ignore last character
4259 digit = Character.getNumericValue(currentCharacter);
4260 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4261 // OctalDigit OctalDigit
4262 number = (number * 8) + digit;
4263 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4264 // --> ignore last character
4268 } else { // has read \OctalDigit NonDigit--> ignore last
4272 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4276 } else { // has read \OctalDigit --> ignore last character
4280 throw new InvalidInputException(INVALID_ESCAPE);
4281 currentCharacter = (char) number;
4283 throw new InvalidInputException(INVALID_ESCAPE);
4287 // chech presence of task: tags
4288 // TODO (frederic) see if we need to take unicode characters into account...
4289 public void checkTaskTag(int commentStart, int commentEnd) {
4290 char[] src = this.source;
4292 // only look for newer task: tags
4293 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4296 int foundTaskIndex = this.foundTaskCount;
4297 char previous = src[commentStart + 1]; // should be '*' or '/'
4298 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4300 char[] priority = null;
4301 // check for tag occurrence only if not ambiguous with javadoc tag
4302 if (previous != '@') {
4303 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4304 tag = this.taskTags[itag];
4305 int tagLength = tag.length;
4309 // ensure tag is not leaded with letter if tag starts with a letter
4310 if (Scanner.isPHPIdentifierStart(tag[0])) {
4311 if (Scanner.isPHPIdentifierPart(previous)) {
4316 for (int t = 0; t < tagLength; t++) {
4319 if (x >= this.eofPosition || x >= commentEnd)
4321 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4322 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4329 // ensure tag is not followed with letter if tag finishes with a
4331 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4332 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4335 if (this.foundTaskTags == null) {
4336 this.foundTaskTags = new char[5][];
4337 this.foundTaskMessages = new char[5][];
4338 this.foundTaskPriorities = new char[5][];
4339 this.foundTaskPositions = new int[5][];
4340 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4341 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4342 this.foundTaskCount);
4343 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4344 this.foundTaskCount);
4345 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4346 this.foundTaskCount);
4347 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4348 this.foundTaskCount);
4351 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4353 this.foundTaskTags[this.foundTaskCount] = tag;
4354 this.foundTaskPriorities[this.foundTaskCount] = priority;
4355 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4356 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4357 this.foundTaskCount++;
4358 i += tagLength - 1; // will be incremented when looping
4364 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4365 // retrieve message start and end positions
4366 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4367 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4368 // at most beginning of next task
4369 if (max_value < msgStart) {
4370 max_value = msgStart; // would only occur if tag is before EOF.
4374 for (int j = msgStart; j < max_value; j++) {
4375 if ((c = src[j]) == '\n' || c == '\r') {
4381 for (int j = max_value; j > msgStart; j--) {
4382 if ((c = src[j]) == '*') {
4390 if (msgStart == end)
4393 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4395 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4397 // update the end position of the task
4398 this.foundTaskPositions[i][1] = end;
4399 // get the message source
4400 final int messageLength = end - msgStart + 1;
4401 char[] message = new char[messageLength];
4402 System.arraycopy(src, msgStart, message, 0, messageLength);
4403 this.foundTaskMessages[i] = message;
4407 // chech presence of task: tags
4408 // public void checkTaskTag(int commentStart, int commentEnd) {
4409 // // only look for newer task: tags
4410 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4411 // - 1][0] >= commentStart) {
4414 // int foundTaskIndex = this.foundTaskCount;
4415 // nextChar: for (int i = commentStart; i < commentEnd && i <
4416 // this.eofPosition; i++) {
4417 // char[] tag = null;
4418 // char[] priority = null;
4419 // // check for tag occurrence
4420 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4421 // tag = this.taskTags[itag];
4422 // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4423 // ? this.taskPriorities[itag] : null;
4424 // int tagLength = tag.length;
4425 // for (int t = 0; t < tagLength; t++) {
4426 // if (this.source[i + t] != tag[t])
4427 // continue nextTag;
4429 // if (this.foundTaskTags == null) {
4430 // this.foundTaskTags = new char[5][];
4431 // this.foundTaskMessages = new char[5][];
4432 // this.foundTaskPriorities = new char[5][];
4433 // this.foundTaskPositions = new int[5][];
4434 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4435 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4436 // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4437 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4438 // char[this.foundTaskCount * 2][], 0,
4439 // this.foundTaskCount);
4440 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4441 // new char[this.foundTaskCount * 2][], 0,
4442 // this.foundTaskCount);
4443 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4444 // int[this.foundTaskCount * 2][], 0,
4445 // this.foundTaskCount);
4447 // this.foundTaskTags[this.foundTaskCount] = tag;
4448 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4449 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4451 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4452 // this.foundTaskCount++;
4453 // i += tagLength - 1; // will be incremented when looping
4456 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4457 // // retrieve message start and end positions
4458 // int msgStart = this.foundTaskPositions[i][0] +
4459 // this.foundTaskTags[i].length;
4460 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4461 // 1][0] - 1 : commentEnd - 1;
4462 // // at most beginning of next task
4463 // if (max_value < msgStart)
4464 // max_value = msgStart; // would only occur if tag is before EOF.
4467 // for (int j = msgStart; j < max_value; j++) {
4468 // if ((c = this.source[j]) == '\n' || c == '\r') {
4474 // for (int j = max_value; j > msgStart; j--) {
4475 // if ((c = this.source[j]) == '*') {
4483 // if (msgStart == end)
4484 // continue; // empty
4485 // // trim the message
4486 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4488 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4490 // // update the end position of the task
4491 // this.foundTaskPositions[i][1] = end;
4492 // // get the message source
4493 // final int messageLength = end - msgStart + 1;
4494 // char[] message = new char[messageLength];
4495 // System.arraycopy(source, msgStart, message, 0, messageLength);
4496 // this.foundTaskMessages[i] = message;