1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public Stack encapsedStringStack = null;
44 public char currentCharacter;
46 public int startPosition;
48 public int currentPosition;
50 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the
54 public boolean tokenizeComments;
56 public boolean tokenizeWhiteSpace;
58 public boolean tokenizeStrings;
60 //source should be viewed as a window (aka a part)
61 //of a entire very large stream
65 public char[] withoutUnicodeBuffer;
67 public int withoutUnicodePtr;
69 //when == 0 ==> no unicode in the current token
70 public boolean unicodeAsBackSlash = false;
72 public boolean scanningFloatLiteral = false;
74 //support for /** comments
75 public int[] commentStops = new int[10];
77 public int[] commentStarts = new int[10];
79 public int commentPtr = -1; // no comment test with commentPtr value -1
81 protected int lastCommentLinePosition = -1;
83 //diet parsing support - jump over some method body when requested
84 public boolean diet = false;
86 //support for the poor-line-debuggers ....
87 //remember the position of the cr/lf
88 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
92 public boolean wasAcr = false;
94 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
96 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
98 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
100 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
102 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
104 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
106 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
108 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
110 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
112 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
116 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
118 //----------------optimized identifier managment------------------
119 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
120 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
121 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
122 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
123 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
124 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
125 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
126 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
127 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
129 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
131 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
132 charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
133 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
134 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
135 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
136 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
137 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
138 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
140 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
142 static final int TableSize = 30, InternalTableSize = 6;
145 public static final int OptimizedLength = 6;
148 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
150 // support for detecting non-externalized string literals
151 int currentLineNr = -1;
153 int previousLineNr = -1;
155 NLSLine currentLine = null;
157 List lines = new ArrayList();
159 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
161 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
163 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
165 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
167 public StringLiteral[] nonNLSStrings = null;
169 public boolean checkNonExternalizedStringLiterals = true;
171 public boolean wasNonExternalizedStringLiteral = false;
173 for (int i = 0; i < 6; i++) {
174 for (int j = 0; j < TableSize; j++) {
175 for (int k = 0; k < InternalTableSize; k++) {
176 charArray_length[i][j][k] = initCharArray;
182 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
184 public static final int RoundBracket = 0;
186 public static final int SquareBracket = 1;
188 public static final int CurlyBracket = 2;
190 public static final int BracketKinds = 3;
193 public char[][] foundTaskTags = null;
195 public char[][] foundTaskMessages;
197 public char[][] foundTaskPriorities = null;
199 public int[][] foundTaskPositions;
201 public int foundTaskCount = 0;
203 public char[][] taskTags = null;
205 public char[][] taskPriorities = null;
207 public boolean isTaskCaseSensitive = true;
209 public static final boolean DEBUG = false;
211 public static final boolean TRACE = false;
213 public ICompilationUnit compilationUnit = null;
216 * Determines if the specified character is permissible as the first character in a PHP identifier.
218 * The '$' character for HP variables isn't regarded as the first character !
220 public static boolean isPHPIdentifierStart(char ch) {
221 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
225 * Determines if the specified character may be part of a PHP identifier as other than the first character
227 public static boolean isPHPIdentifierPart(char ch) {
228 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
231 public final boolean atEnd() {
232 // This code is not relevant if source is
233 // Only a part of the real stream input
234 return source.length == currentPosition;
237 public char[] getCurrentIdentifierSource() {
238 //return the token REAL source (aka unicodes are precomputed)
240 // if (withoutUnicodePtr != 0)
241 // //0 is used as a fast test flag so the real first char is in position 1
243 // withoutUnicodeBuffer,
245 // result = new char[withoutUnicodePtr],
247 // withoutUnicodePtr);
249 int length = currentPosition - startPosition;
250 switch (length) { // see OptimizedLength
252 return optimizedCurrentTokenSource1();
254 return optimizedCurrentTokenSource2();
256 return optimizedCurrentTokenSource3();
258 return optimizedCurrentTokenSource4();
260 return optimizedCurrentTokenSource5();
262 return optimizedCurrentTokenSource6();
265 System.arraycopy(source, startPosition, result = new char[length], 0, length);
270 public int getCurrentTokenEndPosition() {
271 return this.currentPosition - 1;
274 public final char[] getCurrentTokenSource() {
275 // Return the token REAL source (aka unicodes are precomputed)
277 // if (withoutUnicodePtr != 0)
278 // // 0 is used as a fast test flag so the real first char is in position 1
280 // withoutUnicodeBuffer,
282 // result = new char[withoutUnicodePtr],
284 // withoutUnicodePtr);
287 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
292 public final char[] getCurrentTokenSource(int startPos) {
293 // Return the token REAL source (aka unicodes are precomputed)
295 // if (withoutUnicodePtr != 0)
296 // // 0 is used as a fast test flag so the real first char is in position 1
298 // withoutUnicodeBuffer,
300 // result = new char[withoutUnicodePtr],
302 // withoutUnicodePtr);
305 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
310 public final char[] getCurrentTokenSourceString() {
311 //return the token REAL source (aka unicodes are precomputed).
312 //REMOVE the two " that are at the beginning and the end.
314 if (withoutUnicodePtr != 0)
315 //0 is used as a fast test flag so the real first char is in position 1
316 System.arraycopy(withoutUnicodeBuffer, 2,
317 //2 is 1 (real start) + 1 (to jump over the ")
318 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
321 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
326 public int getCurrentTokenStartPosition() {
327 return this.startPosition;
330 public final char[] getCurrentStringLiteralSource() {
331 // Return the token REAL source (aka unicodes are precomputed)
332 if (startPosition + 1 >= currentPosition) {
337 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
342 public final char[] getCurrentStringLiteralSource(int startPos) {
343 // Return the token REAL source (aka unicodes are precomputed)
346 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
352 * Search the source position corresponding to the end of a given line number
354 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
356 * In case the given line number is inconsistent, answers -1.
358 public final int getLineEnd(int lineNumber) {
359 if (lineEnds == null)
361 if (lineNumber >= lineEnds.length)
365 if (lineNumber == lineEnds.length - 1)
367 return lineEnds[lineNumber - 1];
368 // next line start one character behind the lineEnd of the previous line
372 * Search the source position corresponding to the beginning of a given line number
374 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
376 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
378 * In case the given line number is inconsistent, answers -1.
380 public final int getLineStart(int lineNumber) {
381 if (lineEnds == null)
383 if (lineNumber >= lineEnds.length)
388 return initialPosition;
389 return lineEnds[lineNumber - 2] + 1;
390 // next line start one character behind the lineEnd of the previous line
393 public final boolean getNextChar(char testedChar) {
395 //handle the case of unicode.
396 //when a unicode appears then we must use a buffer that holds char
398 //At the end of this method currentCharacter holds the new visited char
399 //and currentPosition points right next after it
400 //Both previous lines are true if the currentCharacter is == to the
402 //On false, no side effect has occured.
403 //ALL getNextChar.... ARE OPTIMIZED COPIES
404 int temp = currentPosition;
406 currentCharacter = source[currentPosition++];
407 // if (((currentCharacter = source[currentPosition++]) == '\\')
408 // && (source[currentPosition] == 'u')) {
409 // //-------------unicode traitement ------------
410 // int c1, c2, c3, c4;
411 // int unicodeSize = 6;
412 // currentPosition++;
413 // while (source[currentPosition] == 'u') {
414 // currentPosition++;
418 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
420 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
422 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
424 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
426 // currentPosition = temp;
430 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
431 // if (currentCharacter != testedChar) {
432 // currentPosition = temp;
435 // unicodeAsBackSlash = currentCharacter == '\\';
437 // //need the unicode buffer
438 // if (withoutUnicodePtr == 0) {
439 // //buffer all the entries that have been left aside....
440 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
444 // withoutUnicodeBuffer,
446 // withoutUnicodePtr);
448 // //fill the buffer with the char
449 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
452 // } //-------------end unicode traitement--------------
454 if (currentCharacter != testedChar) {
455 currentPosition = temp;
458 unicodeAsBackSlash = false;
459 // if (withoutUnicodePtr != 0)
460 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
463 } catch (IndexOutOfBoundsException e) {
464 unicodeAsBackSlash = false;
465 currentPosition = temp;
470 public final int getNextChar(char testedChar1, char testedChar2) {
471 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
472 //test can be done with (x==0) for the first and (x>0) for the second
473 //handle the case of unicode.
474 //when a unicode appears then we must use a buffer that holds char
476 //At the end of this method currentCharacter holds the new visited char
477 //and currentPosition points right next after it
478 //Both previous lines are true if the currentCharacter is == to the
480 //On false, no side effect has occured.
481 //ALL getNextChar.... ARE OPTIMIZED COPIES
482 int temp = currentPosition;
485 currentCharacter = source[currentPosition++];
486 // if (((currentCharacter = source[currentPosition++]) == '\\')
487 // && (source[currentPosition] == 'u')) {
488 // //-------------unicode traitement ------------
489 // int c1, c2, c3, c4;
490 // int unicodeSize = 6;
491 // currentPosition++;
492 // while (source[currentPosition] == 'u') {
493 // currentPosition++;
497 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
499 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
501 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
503 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
505 // currentPosition = temp;
509 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
510 // if (currentCharacter == testedChar1)
512 // else if (currentCharacter == testedChar2)
515 // currentPosition = temp;
519 // //need the unicode buffer
520 // if (withoutUnicodePtr == 0) {
521 // //buffer all the entries that have been left aside....
522 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
526 // withoutUnicodeBuffer,
528 // withoutUnicodePtr);
530 // //fill the buffer with the char
531 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
533 // } //-------------end unicode traitement--------------
535 if (currentCharacter == testedChar1)
537 else if (currentCharacter == testedChar2)
540 currentPosition = temp;
543 // if (withoutUnicodePtr != 0)
544 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
547 } catch (IndexOutOfBoundsException e) {
548 currentPosition = temp;
553 public final boolean getNextCharAsDigit() {
555 //handle the case of unicode.
556 //when a unicode appears then we must use a buffer that holds char
558 //At the end of this method currentCharacter holds the new visited char
559 //and currentPosition points right next after it
560 //Both previous lines are true if the currentCharacter is a digit
561 //On false, no side effect has occured.
562 //ALL getNextChar.... ARE OPTIMIZED COPIES
563 int temp = currentPosition;
565 currentCharacter = source[currentPosition++];
566 // if (((currentCharacter = source[currentPosition++]) == '\\')
567 // && (source[currentPosition] == 'u')) {
568 // //-------------unicode traitement ------------
569 // int c1, c2, c3, c4;
570 // int unicodeSize = 6;
571 // currentPosition++;
572 // while (source[currentPosition] == 'u') {
573 // currentPosition++;
577 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
579 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
581 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
583 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
585 // currentPosition = temp;
589 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
590 // if (!Character.isDigit(currentCharacter)) {
591 // currentPosition = temp;
595 // //need the unicode buffer
596 // if (withoutUnicodePtr == 0) {
597 // //buffer all the entries that have been left aside....
598 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
602 // withoutUnicodeBuffer,
604 // withoutUnicodePtr);
606 // //fill the buffer with the char
607 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
609 // } //-------------end unicode traitement--------------
611 if (!Character.isDigit(currentCharacter)) {
612 currentPosition = temp;
615 // if (withoutUnicodePtr != 0)
616 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
619 } catch (IndexOutOfBoundsException e) {
620 currentPosition = temp;
625 public final boolean getNextCharAsDigit(int radix) {
627 //handle the case of unicode.
628 //when a unicode appears then we must use a buffer that holds char
630 //At the end of this method currentCharacter holds the new visited char
631 //and currentPosition points right next after it
632 //Both previous lines are true if the currentCharacter is a digit base on
634 //On false, no side effect has occured.
635 //ALL getNextChar.... ARE OPTIMIZED COPIES
636 int temp = currentPosition;
638 currentCharacter = source[currentPosition++];
639 // if (((currentCharacter = source[currentPosition++]) == '\\')
640 // && (source[currentPosition] == 'u')) {
641 // //-------------unicode traitement ------------
642 // int c1, c2, c3, c4;
643 // int unicodeSize = 6;
644 // currentPosition++;
645 // while (source[currentPosition] == 'u') {
646 // currentPosition++;
650 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
652 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
654 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
656 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
658 // currentPosition = temp;
662 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
663 // if (Character.digit(currentCharacter, radix) == -1) {
664 // currentPosition = temp;
668 // //need the unicode buffer
669 // if (withoutUnicodePtr == 0) {
670 // //buffer all the entries that have been left aside....
671 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
675 // withoutUnicodeBuffer,
677 // withoutUnicodePtr);
679 // //fill the buffer with the char
680 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
682 // } //-------------end unicode traitement--------------
684 if (Character.digit(currentCharacter, radix) == -1) {
685 currentPosition = temp;
688 // if (withoutUnicodePtr != 0)
689 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
692 } catch (IndexOutOfBoundsException e) {
693 currentPosition = temp;
698 public boolean getNextCharAsJavaIdentifierPart() {
700 //handle the case of unicode.
701 //when a unicode appears then we must use a buffer that holds char
703 //At the end of this method currentCharacter holds the new visited char
704 //and currentPosition points right next after it
705 //Both previous lines are true if the currentCharacter is a
706 // JavaIdentifierPart
707 //On false, no side effect has occured.
708 //ALL getNextChar.... ARE OPTIMIZED COPIES
709 int temp = currentPosition;
711 currentCharacter = source[currentPosition++];
712 // if (((currentCharacter = source[currentPosition++]) == '\\')
713 // && (source[currentPosition] == 'u')) {
714 // //-------------unicode traitement ------------
715 // int c1, c2, c3, c4;
716 // int unicodeSize = 6;
717 // currentPosition++;
718 // while (source[currentPosition] == 'u') {
719 // currentPosition++;
723 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
725 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
727 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
729 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
731 // currentPosition = temp;
735 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
736 // if (!isPHPIdentifierPart(currentCharacter)) {
737 // currentPosition = temp;
741 // //need the unicode buffer
742 // if (withoutUnicodePtr == 0) {
743 // //buffer all the entries that have been left aside....
744 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
748 // withoutUnicodeBuffer,
750 // withoutUnicodePtr);
752 // //fill the buffer with the char
753 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
755 // } //-------------end unicode traitement--------------
757 if (!isPHPIdentifierPart(currentCharacter)) {
758 currentPosition = temp;
761 // if (withoutUnicodePtr != 0)
762 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
765 } catch (IndexOutOfBoundsException e) {
766 currentPosition = temp;
771 public int getCastOrParen() {
772 int tempPosition = currentPosition;
773 char tempCharacter = currentCharacter;
774 int tempToken = TokenNameLPAREN;
775 boolean found = false;
776 StringBuffer buf = new StringBuffer();
779 currentCharacter = source[currentPosition++];
780 } while (currentCharacter == ' ' || currentCharacter == '\t');
781 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
782 buf.append(currentCharacter);
783 currentCharacter = source[currentPosition++];
785 if (buf.length() >= 3 && buf.length() <= 7) {
786 char[] data = buf.toString().toCharArray();
788 switch (data.length) {
791 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
793 tempToken = TokenNameintCAST;
798 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
800 tempToken = TokenNameboolCAST;
803 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
805 tempToken = TokenNamedoubleCAST;
811 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
812 && (data[++index] == 'y')) {
814 tempToken = TokenNamearrayCAST;
817 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
818 && (data[++index] == 't')) {
820 tempToken = TokenNameunsetCAST;
823 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
824 && (data[++index] == 't')) {
826 tempToken = TokenNamedoubleCAST;
832 // object string double
833 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
834 && (data[++index] == 'c') && (data[++index] == 't')) {
836 tempToken = TokenNameobjectCAST;
839 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
840 && (data[++index] == 'n') && (data[++index] == 'g')) {
842 tempToken = TokenNamestringCAST;
845 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
846 && (data[++index] == 'l') && (data[++index] == 'e')) {
848 tempToken = TokenNamedoubleCAST;
855 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
856 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
858 tempToken = TokenNameboolCAST;
861 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
862 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
864 tempToken = TokenNameintCAST;
870 while (currentCharacter == ' ' || currentCharacter == '\t') {
871 currentCharacter = source[currentPosition++];
873 if (currentCharacter == ')') {
878 } catch (IndexOutOfBoundsException e) {
880 currentCharacter = tempCharacter;
881 currentPosition = tempPosition;
882 return TokenNameLPAREN;
885 public void consumeStringInterpolated() throws InvalidInputException {
887 // consume next character
888 unicodeAsBackSlash = false;
889 currentCharacter = source[currentPosition++];
890 // if (((currentCharacter = source[currentPosition++]) == '\\')
891 // && (source[currentPosition] == 'u')) {
892 // getNextUnicodeChar();
894 // if (withoutUnicodePtr != 0) {
895 // withoutUnicodeBuffer[++withoutUnicodePtr] =
899 while (currentCharacter != '`') {
900 /** ** in PHP \r and \n are valid in string literals *** */
901 // if ((currentCharacter == '\n')
902 // || (currentCharacter == '\r')) {
903 // // relocate if finding another quote fairly close: thus unicode
904 // '/u000D' will be fully consumed
905 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
906 // if (currentPosition + lookAhead == source.length)
908 // if (source[currentPosition + lookAhead] == '\n')
910 // if (source[currentPosition + lookAhead] == '\"') {
911 // currentPosition += lookAhead + 1;
915 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
917 if (currentCharacter == '\\') {
918 int escapeSize = currentPosition;
919 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
920 //scanEscapeCharacter make a side effect on this value and we need
921 // the previous value few lines down this one
922 scanDoubleQuotedEscapeCharacter();
923 escapeSize = currentPosition - escapeSize;
924 if (withoutUnicodePtr == 0) {
925 //buffer all the entries that have been left aside....
926 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
927 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
928 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
929 } else { //overwrite the / in the buffer
930 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
931 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
932 // where only one is correct
937 // consume next character
938 unicodeAsBackSlash = false;
939 currentCharacter = source[currentPosition++];
940 // if (((currentCharacter = source[currentPosition++]) == '\\')
941 // && (source[currentPosition] == 'u')) {
942 // getNextUnicodeChar();
944 if (withoutUnicodePtr != 0) {
945 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
949 } catch (IndexOutOfBoundsException e) {
950 // reset end position for error reporting
951 currentPosition -= 2;
952 throw new InvalidInputException(UNTERMINATED_STRING);
953 } catch (InvalidInputException e) {
954 if (e.getMessage().equals(INVALID_ESCAPE)) {
955 // relocate if finding another quote fairly close: thus unicode
956 // '/u000D' will be fully consumed
957 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
958 if (currentPosition + lookAhead == source.length)
960 if (source[currentPosition + lookAhead] == '\n')
962 if (source[currentPosition + lookAhead] == '`') {
963 currentPosition += lookAhead + 1;
970 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
971 // //$NON-NLS-?$ where ? is an
973 if (currentLine == null) {
974 currentLine = new NLSLine();
975 lines.add(currentLine);
977 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
981 public void consumeStringConstant() throws InvalidInputException {
983 // consume next character
984 unicodeAsBackSlash = false;
985 currentCharacter = source[currentPosition++];
986 // if (((currentCharacter = source[currentPosition++]) == '\\')
987 // && (source[currentPosition] == 'u')) {
988 // getNextUnicodeChar();
990 // if (withoutUnicodePtr != 0) {
991 // withoutUnicodeBuffer[++withoutUnicodePtr] =
995 while (currentCharacter != '\'') {
996 /** ** in PHP \r and \n are valid in string literals *** */
997 // if ((currentCharacter == '\n')
998 // || (currentCharacter == '\r')) {
999 // // relocate if finding another quote fairly close: thus unicode
1000 // '/u000D' will be fully consumed
1001 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1002 // if (currentPosition + lookAhead == source.length)
1004 // if (source[currentPosition + lookAhead] == '\n')
1006 // if (source[currentPosition + lookAhead] == '\"') {
1007 // currentPosition += lookAhead + 1;
1011 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1013 if (currentCharacter == '\\') {
1014 int escapeSize = currentPosition;
1015 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1016 //scanEscapeCharacter make a side effect on this value and we need
1017 // the previous value few lines down this one
1018 scanSingleQuotedEscapeCharacter();
1019 escapeSize = currentPosition - escapeSize;
1020 if (withoutUnicodePtr == 0) {
1021 //buffer all the entries that have been left aside....
1022 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1023 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1024 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1025 } else { //overwrite the / in the buffer
1026 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1027 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1028 // where only one is correct
1029 withoutUnicodePtr--;
1033 // consume next character
1034 unicodeAsBackSlash = false;
1035 currentCharacter = source[currentPosition++];
1036 // if (((currentCharacter = source[currentPosition++]) == '\\')
1037 // && (source[currentPosition] == 'u')) {
1038 // getNextUnicodeChar();
1040 if (withoutUnicodePtr != 0) {
1041 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1045 } catch (IndexOutOfBoundsException e) {
1046 // reset end position for error reporting
1047 currentPosition -= 2;
1048 throw new InvalidInputException(UNTERMINATED_STRING);
1049 } catch (InvalidInputException e) {
1050 if (e.getMessage().equals(INVALID_ESCAPE)) {
1051 // relocate if finding another quote fairly close: thus unicode
1052 // '/u000D' will be fully consumed
1053 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1054 if (currentPosition + lookAhead == source.length)
1056 if (source[currentPosition + lookAhead] == '\n')
1058 if (source[currentPosition + lookAhead] == '\'') {
1059 currentPosition += lookAhead + 1;
1066 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1067 // //$NON-NLS-?$ where ? is an
1069 if (currentLine == null) {
1070 currentLine = new NLSLine();
1071 lines.add(currentLine);
1073 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1077 public void consumeStringLiteral() throws InvalidInputException {
1079 // consume next character
1080 unicodeAsBackSlash = false;
1081 currentCharacter = source[currentPosition++];
1082 // if (((currentCharacter = source[currentPosition++]) == '\\')
1083 // && (source[currentPosition] == 'u')) {
1084 // getNextUnicodeChar();
1086 // if (withoutUnicodePtr != 0) {
1087 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1088 // currentCharacter;
1091 while (currentCharacter != '"') {
1092 /** ** in PHP \r and \n are valid in string literals *** */
1093 // if ((currentCharacter == '\n')
1094 // || (currentCharacter == '\r')) {
1095 // // relocate if finding another quote fairly close: thus unicode
1096 // '/u000D' will be fully consumed
1097 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1098 // if (currentPosition + lookAhead == source.length)
1100 // if (source[currentPosition + lookAhead] == '\n')
1102 // if (source[currentPosition + lookAhead] == '\"') {
1103 // currentPosition += lookAhead + 1;
1107 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1109 if (currentCharacter == '\\') {
1110 int escapeSize = currentPosition;
1111 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1112 //scanEscapeCharacter make a side effect on this value and we need
1113 // the previous value few lines down this one
1114 scanDoubleQuotedEscapeCharacter();
1115 escapeSize = currentPosition - escapeSize;
1116 if (withoutUnicodePtr == 0) {
1117 //buffer all the entries that have been left aside....
1118 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1119 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1120 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1121 } else { //overwrite the / in the buffer
1122 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1123 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1124 // where only one is correct
1125 withoutUnicodePtr--;
1129 // consume next character
1130 unicodeAsBackSlash = false;
1131 currentCharacter = source[currentPosition++];
1132 // if (((currentCharacter = source[currentPosition++]) == '\\')
1133 // && (source[currentPosition] == 'u')) {
1134 // getNextUnicodeChar();
1136 if (withoutUnicodePtr != 0) {
1137 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1141 } catch (IndexOutOfBoundsException e) {
1142 // reset end position for error reporting
1143 currentPosition -= 2;
1144 throw new InvalidInputException(UNTERMINATED_STRING);
1145 } catch (InvalidInputException e) {
1146 if (e.getMessage().equals(INVALID_ESCAPE)) {
1147 // relocate if finding another quote fairly close: thus unicode
1148 // '/u000D' will be fully consumed
1149 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1150 if (currentPosition + lookAhead == source.length)
1152 if (source[currentPosition + lookAhead] == '\n')
1154 if (source[currentPosition + lookAhead] == '\"') {
1155 currentPosition += lookAhead + 1;
1162 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1163 // //$NON-NLS-?$ where ? is an
1165 if (currentLine == null) {
1166 currentLine = new NLSLine();
1167 lines.add(currentLine);
1169 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1173 public int getNextToken() throws InvalidInputException {
1175 return getInlinedHTML(currentPosition);
1178 this.wasAcr = false;
1180 jumpOverMethodBody();
1182 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1186 withoutUnicodePtr = 0;
1187 //start with a new token
1188 char encapsedChar = ' ';
1189 if (!encapsedStringStack.isEmpty()) {
1190 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1192 if (encapsedChar != '$' && encapsedChar != ' ') {
1193 currentCharacter = source[currentPosition++];
1194 if (currentCharacter == encapsedChar) {
1195 switch (currentCharacter) {
1197 return TokenNameEncapsedString0;
1199 return TokenNameEncapsedString1;
1201 return TokenNameEncapsedString2;
1204 while (currentCharacter != encapsedChar) {
1205 /** ** in PHP \r and \n are valid in string literals *** */
1206 switch (currentCharacter) {
1208 int escapeSize = currentPosition;
1209 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1210 //scanEscapeCharacter make a side effect on this value and
1211 // we need the previous value few lines down this one
1212 scanDoubleQuotedEscapeCharacter();
1213 escapeSize = currentPosition - escapeSize;
1214 if (withoutUnicodePtr == 0) {
1215 //buffer all the entries that have been left aside....
1216 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1217 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1218 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1219 } else { //overwrite the / in the buffer
1220 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1221 if (backSlashAsUnicodeInString) { //there are TWO \ in
1222 withoutUnicodePtr--;
1227 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1229 encapsedStringStack.push(new Character('$'));
1230 return TokenNameSTRING;
1234 if (source[currentPosition] == '$') { // CURLY_OPEN
1236 encapsedStringStack.push(new Character('$'));
1237 return TokenNameSTRING;
1240 // consume next character
1241 unicodeAsBackSlash = false;
1242 currentCharacter = source[currentPosition++];
1243 if (withoutUnicodePtr != 0) {
1244 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1249 return TokenNameSTRING;
1251 // ---------Consume white space and handles startPosition---------
1252 int whiteStart = currentPosition;
1253 startPosition = currentPosition;
1254 currentCharacter = source[currentPosition++];
1255 if (encapsedChar == '$') {
1256 switch (currentCharacter) {
1258 currentCharacter = source[currentPosition++];
1259 return TokenNameSTRING;
1261 if (encapsedChar == '$') {
1262 if (getNextChar('$'))
1263 return TokenNameLBRACE_DOLLAR;
1265 return TokenNameLBRACE;
1267 return TokenNameRBRACE;
1269 return TokenNameLBRACKET;
1271 return TokenNameRBRACKET;
1273 if (tokenizeStrings) {
1274 consumeStringConstant();
1275 return TokenNameStringSingleQuote;
1277 return TokenNameEncapsedString1;
1279 return TokenNameEncapsedString2;
1281 if (tokenizeStrings) {
1282 consumeStringInterpolated();
1283 return TokenNameStringInterpolated;
1285 return TokenNameEncapsedString0;
1287 if (getNextChar('>'))
1288 return TokenNameMINUS_GREATER;
1289 return TokenNameSTRING;
1291 if (currentCharacter == '$') {
1292 int oldPosition = currentPosition;
1294 currentCharacter = source[currentPosition++];
1295 if (currentCharacter == '{') {
1296 return TokenNameDOLLAR_LBRACE;
1298 if (isPHPIdentifierStart(currentCharacter)) {
1299 return scanIdentifierOrKeyword(true);
1301 currentPosition = oldPosition;
1302 return TokenNameSTRING;
1304 } catch (IndexOutOfBoundsException e) {
1305 currentPosition = oldPosition;
1306 return TokenNameSTRING;
1309 if (isPHPIdentifierStart(currentCharacter))
1310 return scanIdentifierOrKeyword(false);
1311 if (Character.isDigit(currentCharacter))
1312 return scanNumber(false);
1313 return TokenNameERROR;
1316 // boolean isWhiteSpace;
1318 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1319 startPosition = currentPosition;
1320 currentCharacter = source[currentPosition++];
1321 // if (((currentCharacter = source[currentPosition++]) == '\\')
1322 // && (source[currentPosition] == 'u')) {
1323 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1325 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1326 checkNonExternalizeString();
1327 if (recordLineSeparator) {
1328 pushLineSeparator();
1333 // isWhiteSpace = (currentCharacter == ' ')
1334 // || Character.isWhitespace(currentCharacter);
1337 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1338 // reposition scanner in case we are interested by spaces as tokens
1340 startPosition = whiteStart;
1341 return TokenNameWHITESPACE;
1343 //little trick to get out in the middle of a source compuation
1344 if (currentPosition > eofPosition)
1345 return TokenNameEOF;
1346 // ---------Identify the next token-------------
1347 switch (currentCharacter) {
1349 return getCastOrParen();
1351 return TokenNameRPAREN;
1353 return TokenNameLBRACE;
1355 return TokenNameRBRACE;
1357 return TokenNameLBRACKET;
1359 return TokenNameRBRACKET;
1361 return TokenNameSEMICOLON;
1363 return TokenNameCOMMA;
1365 if (getNextChar('='))
1366 return TokenNameDOT_EQUAL;
1367 if (getNextCharAsDigit())
1368 return scanNumber(true);
1369 return TokenNameDOT;
1372 if ((test = getNextChar('+', '=')) == 0)
1373 return TokenNamePLUS_PLUS;
1375 return TokenNamePLUS_EQUAL;
1376 return TokenNamePLUS;
1380 if ((test = getNextChar('-', '=')) == 0)
1381 return TokenNameMINUS_MINUS;
1383 return TokenNameMINUS_EQUAL;
1384 if (getNextChar('>'))
1385 return TokenNameMINUS_GREATER;
1386 return TokenNameMINUS;
1389 if (getNextChar('='))
1390 return TokenNameTWIDDLE_EQUAL;
1391 return TokenNameTWIDDLE;
1393 if (getNextChar('=')) {
1394 if (getNextChar('=')) {
1395 return TokenNameNOT_EQUAL_EQUAL;
1397 return TokenNameNOT_EQUAL;
1399 return TokenNameNOT;
1401 if (getNextChar('='))
1402 return TokenNameMULTIPLY_EQUAL;
1403 return TokenNameMULTIPLY;
1405 if (getNextChar('='))
1406 return TokenNameREMAINDER_EQUAL;
1407 return TokenNameREMAINDER;
1409 int oldPosition = currentPosition;
1411 currentCharacter = source[currentPosition++];
1412 } catch (IndexOutOfBoundsException e) {
1413 currentPosition = oldPosition;
1414 return TokenNameLESS;
1416 switch (currentCharacter) {
1418 return TokenNameLESS_EQUAL;
1420 return TokenNameNOT_EQUAL;
1422 if (getNextChar('='))
1423 return TokenNameLEFT_SHIFT_EQUAL;
1424 if (getNextChar('<')) {
1425 currentCharacter = source[currentPosition++];
1426 while (Character.isWhitespace(currentCharacter)) {
1427 currentCharacter = source[currentPosition++];
1429 int heredocStart = currentPosition - 1;
1430 int heredocLength = 0;
1431 if (isPHPIdentifierStart(currentCharacter)) {
1432 currentCharacter = source[currentPosition++];
1434 return TokenNameERROR;
1436 while (isPHPIdentifierPart(currentCharacter)) {
1437 currentCharacter = source[currentPosition++];
1439 heredocLength = currentPosition - heredocStart - 1;
1440 // heredoc end-tag determination
1441 boolean endTag = true;
1444 ch = source[currentPosition++];
1445 if (ch == '\r' || ch == '\n') {
1446 if (recordLineSeparator) {
1447 pushLineSeparator();
1451 for (int i = 0; i < heredocLength; i++) {
1452 if (source[currentPosition + i] != source[heredocStart + i]) {
1458 currentPosition += heredocLength - 1;
1459 currentCharacter = source[currentPosition++];
1460 break; // do...while loop
1466 return TokenNameHEREDOC;
1468 return TokenNameLEFT_SHIFT;
1470 currentPosition = oldPosition;
1471 return TokenNameLESS;
1475 if ((test = getNextChar('=', '>')) == 0)
1476 return TokenNameGREATER_EQUAL;
1478 if ((test = getNextChar('=', '>')) == 0)
1479 return TokenNameRIGHT_SHIFT_EQUAL;
1480 return TokenNameRIGHT_SHIFT;
1482 return TokenNameGREATER;
1485 if (getNextChar('=')) {
1486 if (getNextChar('=')) {
1487 return TokenNameEQUAL_EQUAL_EQUAL;
1489 return TokenNameEQUAL_EQUAL;
1491 if (getNextChar('>'))
1492 return TokenNameEQUAL_GREATER;
1493 return TokenNameEQUAL;
1496 if ((test = getNextChar('&', '=')) == 0)
1497 return TokenNameAND_AND;
1499 return TokenNameAND_EQUAL;
1500 return TokenNameAND;
1504 if ((test = getNextChar('|', '=')) == 0)
1505 return TokenNameOR_OR;
1507 return TokenNameOR_EQUAL;
1511 if (getNextChar('='))
1512 return TokenNameXOR_EQUAL;
1513 return TokenNameXOR;
1515 if (getNextChar('>')) {
1517 if (currentPosition == source.length) {
1519 return TokenNameINLINE_HTML;
1521 return getInlinedHTML(currentPosition - 2);
1523 return TokenNameQUESTION;
1525 if (getNextChar(':'))
1526 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1527 return TokenNameCOLON;
1531 consumeStringConstant();
1532 return TokenNameStringSingleQuote;
1534 if (tokenizeStrings) {
1535 consumeStringLiteral();
1536 return TokenNameStringDoubleQuote;
1538 return TokenNameEncapsedString2;
1540 if (tokenizeStrings) {
1541 consumeStringInterpolated();
1542 return TokenNameStringInterpolated;
1544 return TokenNameEncapsedString0;
1547 char startChar = currentCharacter;
1548 if (getNextChar('=') && startChar == '/') {
1549 return TokenNameDIVIDE_EQUAL;
1552 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1554 this.lastCommentLinePosition = this.currentPosition;
1555 int endPositionForLineComment = 0;
1556 try { //get the next char
1557 currentCharacter = source[currentPosition++];
1558 // if (((currentCharacter = source[currentPosition++])
1560 // && (source[currentPosition] == 'u')) {
1561 // //-------------unicode traitement ------------
1562 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1563 // currentPosition++;
1564 // while (source[currentPosition] == 'u') {
1565 // currentPosition++;
1568 // Character.getNumericValue(source[currentPosition++]))
1572 // Character.getNumericValue(source[currentPosition++]))
1576 // Character.getNumericValue(source[currentPosition++]))
1580 // Character.getNumericValue(source[currentPosition++]))
1584 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1586 // currentCharacter =
1587 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1590 //handle the \\u case manually into comment
1591 // if (currentCharacter == '\\') {
1592 // if (source[currentPosition] == '\\')
1593 // currentPosition++;
1594 // } //jump over the \\
1595 boolean isUnicode = false;
1596 while (currentCharacter != '\r' && currentCharacter != '\n') {
1597 this.lastCommentLinePosition = this.currentPosition;
1598 if (currentCharacter == '?') {
1599 if (getNextChar('>')) {
1600 startPosition = currentPosition - 2;
1602 return TokenNameINLINE_HTML;
1607 currentCharacter = source[currentPosition++];
1608 // if (((currentCharacter = source[currentPosition++])
1610 // && (source[currentPosition] == 'u')) {
1611 // isUnicode = true;
1612 // //-------------unicode traitement ------------
1613 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1614 // currentPosition++;
1615 // while (source[currentPosition] == 'u') {
1616 // currentPosition++;
1619 // Character.getNumericValue(source[currentPosition++]))
1623 // Character.getNumericValue(
1624 // source[currentPosition++]))
1628 // Character.getNumericValue(
1629 // source[currentPosition++]))
1633 // Character.getNumericValue(
1634 // source[currentPosition++]))
1638 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1640 // currentCharacter =
1641 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1644 //handle the \\u case manually into comment
1645 // if (currentCharacter == '\\') {
1646 // if (source[currentPosition] == '\\')
1647 // currentPosition++;
1648 // } //jump over the \\
1651 endPositionForLineComment = currentPosition - 6;
1653 endPositionForLineComment = currentPosition - 1;
1655 // recordComment(false);
1656 recordComment(TokenNameCOMMENT_LINE);
1657 if (this.taskTags != null)
1658 checkTaskTag(this.startPosition, this.currentPosition);
1659 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1660 checkNonExternalizeString();
1661 if (recordLineSeparator) {
1663 pushUnicodeLineSeparator();
1665 pushLineSeparator();
1671 if (tokenizeComments) {
1673 currentPosition = endPositionForLineComment;
1674 // reset one character behind
1676 return TokenNameCOMMENT_LINE;
1678 } catch (IndexOutOfBoundsException e) { //an eof will them
1680 if (tokenizeComments) {
1682 // reset one character behind
1683 return TokenNameCOMMENT_LINE;
1689 //traditional and annotation comment
1690 boolean isJavadoc = false, star = false;
1691 // consume next character
1692 unicodeAsBackSlash = false;
1693 currentCharacter = source[currentPosition++];
1694 // if (((currentCharacter = source[currentPosition++]) ==
1696 // && (source[currentPosition] == 'u')) {
1697 // getNextUnicodeChar();
1699 // if (withoutUnicodePtr != 0) {
1700 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1701 // currentCharacter;
1704 if (currentCharacter == '*') {
1708 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1709 checkNonExternalizeString();
1710 if (recordLineSeparator) {
1711 pushLineSeparator();
1716 try { //get the next char
1717 currentCharacter = source[currentPosition++];
1718 // if (((currentCharacter = source[currentPosition++])
1720 // && (source[currentPosition] == 'u')) {
1721 // //-------------unicode traitement ------------
1722 // getNextUnicodeChar();
1724 //handle the \\u case manually into comment
1725 // if (currentCharacter == '\\') {
1726 // if (source[currentPosition] == '\\')
1727 // currentPosition++;
1728 // //jump over the \\
1730 // empty comment is not a javadoc /**/
1731 if (currentCharacter == '/') {
1734 //loop until end of comment */
1735 while ((currentCharacter != '/') || (!star)) {
1736 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1737 checkNonExternalizeString();
1738 if (recordLineSeparator) {
1739 pushLineSeparator();
1744 star = currentCharacter == '*';
1746 currentCharacter = source[currentPosition++];
1747 // if (((currentCharacter = source[currentPosition++])
1749 // && (source[currentPosition] == 'u')) {
1750 // //-------------unicode traitement ------------
1751 // getNextUnicodeChar();
1753 //handle the \\u case manually into comment
1754 // if (currentCharacter == '\\') {
1755 // if (source[currentPosition] == '\\')
1756 // currentPosition++;
1757 // } //jump over the \\
1759 //recordComment(isJavadoc);
1761 recordComment(TokenNameCOMMENT_PHPDOC);
1763 recordComment(TokenNameCOMMENT_BLOCK);
1766 if (tokenizeComments) {
1768 return TokenNameCOMMENT_PHPDOC;
1769 return TokenNameCOMMENT_BLOCK;
1772 if (this.taskTags != null) {
1773 checkTaskTag(this.startPosition, this.currentPosition);
1775 } catch (IndexOutOfBoundsException e) {
1776 // reset end position for error reporting
1777 currentPosition -= 2;
1778 throw new InvalidInputException(UNTERMINATED_COMMENT);
1782 return TokenNameDIVIDE;
1786 return TokenNameEOF;
1787 //the atEnd may not be <currentPosition == source.length> if
1788 // source is only some part of a real (external) stream
1789 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1791 if (currentCharacter == '$') {
1792 int oldPosition = currentPosition;
1794 currentCharacter = source[currentPosition++];
1795 if (isPHPIdentifierStart(currentCharacter)) {
1796 return scanIdentifierOrKeyword(true);
1798 currentPosition = oldPosition;
1799 return TokenNameDOLLAR;
1801 } catch (IndexOutOfBoundsException e) {
1802 currentPosition = oldPosition;
1803 return TokenNameDOLLAR;
1806 if (isPHPIdentifierStart(currentCharacter))
1807 return scanIdentifierOrKeyword(false);
1808 if (Character.isDigit(currentCharacter))
1809 return scanNumber(false);
1810 return TokenNameERROR;
1813 } //-----------------end switch while try--------------------
1814 catch (IndexOutOfBoundsException e) {
1817 return TokenNameEOF;
1820 private int getInlinedHTML(int start) throws InvalidInputException {
1821 int token = getInlinedHTMLToken(start);
1822 if (token == TokenNameINLINE_HTML) {
1823 // Stack stack = new Stack();
1824 // // scan html for errors
1825 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1826 // int lastPHPEndPos=0;
1827 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1828 // Tag tag=(Tag)i.next();
1830 // if (tag instanceof StartTag) {
1831 // StartTag startTag=(StartTag)tag;
1832 // // System.out.println("startTag: "+tag);
1833 // if (startTag.isServerTag()) {
1834 // // TODO : what to do with a server tag ?
1836 // // do whatever with HTML start tag
1837 // // use startTag.getElement() to find the element corresponding
1838 // // to this start tag which may be useful if you implement code
1840 // stack.push(startTag);
1843 // EndTag endTag=(EndTag)tag;
1844 // StartTag stag = (StartTag) stack.peek();
1845 //// System.out.println("endTag: "+tag);
1846 // // do whatever with HTML end tag.
1855 * @throws InvalidInputException
1857 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1858 if (currentPosition > source.length) {
1859 currentPosition = source.length;
1860 return TokenNameEOF;
1862 startPosition = start;
1865 currentCharacter = source[currentPosition++];
1866 if (currentCharacter == '<') {
1867 if (getNextChar('?')) {
1868 currentCharacter = source[currentPosition++];
1869 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1871 // (currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1873 if (ignorePHPOneLiner) {
1874 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1876 return TokenNameINLINE_HTML;
1880 return TokenNameINLINE_HTML;
1883 // boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1885 int test = getNextChar('H', 'h');
1887 test = getNextChar('P', 'p');
1890 if (ignorePHPOneLiner) {
1891 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1893 return TokenNameINLINE_HTML;
1897 return TokenNameINLINE_HTML;
1905 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1906 if (recordLineSeparator) {
1907 pushLineSeparator();
1912 } //-----------------while--------------------
1914 return TokenNameINLINE_HTML;
1915 } //-----------------try--------------------
1916 catch (IndexOutOfBoundsException e) {
1917 startPosition = start;
1921 return TokenNameINLINE_HTML;
1927 private int lookAheadLinePHPTag() {
1928 // check if the PHP is only in this line (for CodeFormatter)
1929 int currentPositionInLine = currentPosition;
1930 char previousCharInLine = ' ';
1931 char currentCharInLine = ' ';
1932 boolean singleQuotedStringActive = false;
1933 boolean doubleQuotedStringActive = false;
1936 // look ahead in this line
1938 previousCharInLine = currentCharInLine;
1939 currentCharInLine = source[currentPositionInLine++];
1940 switch (currentCharInLine) {
1942 if (previousCharInLine == '?') {
1943 // update the scanner's current Position in the source
1944 currentPosition = currentPositionInLine;
1945 // use as "dummy" token
1946 return TokenNameEOF;
1950 if (doubleQuotedStringActive) {
1951 if (previousCharInLine != '\\') {
1952 doubleQuotedStringActive = false;
1955 if (!singleQuotedStringActive) {
1956 doubleQuotedStringActive = true;
1961 if (singleQuotedStringActive) {
1962 if (previousCharInLine != '\\') {
1963 singleQuotedStringActive = false;
1966 if (!doubleQuotedStringActive) {
1967 singleQuotedStringActive = true;
1973 return TokenNameINLINE_HTML;
1975 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1977 return TokenNameINLINE_HTML;
1981 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1983 return TokenNameINLINE_HTML;
1987 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1989 return TokenNameINLINE_HTML;
1994 } catch (IndexOutOfBoundsException e) {
1996 currentPosition = currentPositionInLine;
1997 return TokenNameINLINE_HTML;
2001 // public final void getNextUnicodeChar()
2002 // throws IndexOutOfBoundsException, InvalidInputException {
2004 // //handle the case of unicode.
2005 // //when a unicode appears then we must use a buffer that holds char
2007 // //At the end of this method currentCharacter holds the new visited char
2008 // //and currentPosition points right next after it
2010 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2012 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2013 // currentPosition++;
2014 // while (source[currentPosition] == 'u') {
2015 // currentPosition++;
2019 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2021 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2023 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2025 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2027 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2029 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2030 // //need the unicode buffer
2031 // if (withoutUnicodePtr == 0) {
2032 // //buffer all the entries that have been left aside....
2033 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2034 // System.arraycopy(
2037 // withoutUnicodeBuffer,
2039 // withoutUnicodePtr);
2041 // //fill the buffer with the char
2042 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2044 // unicodeAsBackSlash = currentCharacter == '\\';
2047 * Tokenize a method body, assuming that curly brackets are properly balanced.
2049 public final void jumpOverMethodBody() {
2050 this.wasAcr = false;
2053 while (true) { //loop for jumping over comments
2054 // ---------Consume white space and handles startPosition---------
2055 boolean isWhiteSpace;
2057 startPosition = currentPosition;
2058 currentCharacter = source[currentPosition++];
2059 // if (((currentCharacter = source[currentPosition++]) == '\\')
2060 // && (source[currentPosition] == 'u')) {
2061 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2063 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2064 pushLineSeparator();
2065 isWhiteSpace = Character.isWhitespace(currentCharacter);
2067 } while (isWhiteSpace);
2068 // -------consume token until } is found---------
2069 switch (currentCharacter) {
2080 test = getNextChar('\\');
2083 scanDoubleQuotedEscapeCharacter();
2084 } catch (InvalidInputException ex) {
2088 // try { // consume next character
2089 unicodeAsBackSlash = false;
2090 currentCharacter = source[currentPosition++];
2091 // if (((currentCharacter = source[currentPosition++]) == '\\')
2092 // && (source[currentPosition] == 'u')) {
2093 // getNextUnicodeChar();
2095 if (withoutUnicodePtr != 0) {
2096 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2099 // } catch (InvalidInputException ex) {
2107 // try { // consume next character
2108 unicodeAsBackSlash = false;
2109 currentCharacter = source[currentPosition++];
2110 // if (((currentCharacter = source[currentPosition++]) == '\\')
2111 // && (source[currentPosition] == 'u')) {
2112 // getNextUnicodeChar();
2114 if (withoutUnicodePtr != 0) {
2115 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2118 // } catch (InvalidInputException ex) {
2120 while (currentCharacter != '"') {
2121 if (currentCharacter == '\r') {
2122 if (source[currentPosition] == '\n')
2125 // the string cannot go further that the line
2127 if (currentCharacter == '\n') {
2129 // the string cannot go further that the line
2131 if (currentCharacter == '\\') {
2133 scanDoubleQuotedEscapeCharacter();
2134 } catch (InvalidInputException ex) {
2138 // try { // consume next character
2139 unicodeAsBackSlash = false;
2140 currentCharacter = source[currentPosition++];
2141 // if (((currentCharacter = source[currentPosition++]) == '\\')
2142 // && (source[currentPosition] == 'u')) {
2143 // getNextUnicodeChar();
2145 if (withoutUnicodePtr != 0) {
2146 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2149 // } catch (InvalidInputException ex) {
2152 } catch (IndexOutOfBoundsException e) {
2158 if ((test = getNextChar('/', '*')) == 0) {
2162 currentCharacter = source[currentPosition++];
2163 // if (((currentCharacter = source[currentPosition++]) ==
2165 // && (source[currentPosition] == 'u')) {
2166 // //-------------unicode traitement ------------
2167 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2168 // currentPosition++;
2169 // while (source[currentPosition] == 'u') {
2170 // currentPosition++;
2173 // Character.getNumericValue(source[currentPosition++]))
2177 // Character.getNumericValue(source[currentPosition++]))
2181 // Character.getNumericValue(source[currentPosition++]))
2185 // Character.getNumericValue(source[currentPosition++]))
2188 // //error don't care of the value
2189 // currentCharacter = 'A';
2190 // } //something different from \n and \r
2192 // currentCharacter =
2193 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2196 while (currentCharacter != '\r' && currentCharacter != '\n') {
2198 currentCharacter = source[currentPosition++];
2199 // if (((currentCharacter = source[currentPosition++])
2201 // && (source[currentPosition] == 'u')) {
2202 // //-------------unicode traitement ------------
2203 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2204 // currentPosition++;
2205 // while (source[currentPosition] == 'u') {
2206 // currentPosition++;
2209 // Character.getNumericValue(source[currentPosition++]))
2213 // Character.getNumericValue(source[currentPosition++]))
2217 // Character.getNumericValue(source[currentPosition++]))
2221 // Character.getNumericValue(source[currentPosition++]))
2224 // //error don't care of the value
2225 // currentCharacter = 'A';
2226 // } //something different from \n and \r
2228 // currentCharacter =
2229 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2233 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2234 pushLineSeparator();
2235 } catch (IndexOutOfBoundsException e) {
2236 } //an eof will them be generated
2240 //traditional and annotation comment
2241 boolean star = false;
2242 // try { // consume next character
2243 unicodeAsBackSlash = false;
2244 currentCharacter = source[currentPosition++];
2245 // if (((currentCharacter = source[currentPosition++]) == '\\')
2246 // && (source[currentPosition] == 'u')) {
2247 // getNextUnicodeChar();
2249 if (withoutUnicodePtr != 0) {
2250 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2253 // } catch (InvalidInputException ex) {
2255 if (currentCharacter == '*') {
2258 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2259 pushLineSeparator();
2260 try { //get the next char
2261 currentCharacter = source[currentPosition++];
2262 // if (((currentCharacter = source[currentPosition++]) ==
2264 // && (source[currentPosition] == 'u')) {
2265 // //-------------unicode traitement ------------
2266 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2267 // currentPosition++;
2268 // while (source[currentPosition] == 'u') {
2269 // currentPosition++;
2272 // Character.getNumericValue(source[currentPosition++]))
2276 // Character.getNumericValue(source[currentPosition++]))
2280 // Character.getNumericValue(source[currentPosition++]))
2284 // Character.getNumericValue(source[currentPosition++]))
2287 // //error don't care of the value
2288 // currentCharacter = 'A';
2289 // } //something different from * and /
2291 // currentCharacter =
2292 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2295 //loop until end of comment */
2296 while ((currentCharacter != '/') || (!star)) {
2297 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2298 pushLineSeparator();
2299 star = currentCharacter == '*';
2301 currentCharacter = source[currentPosition++];
2302 // if (((currentCharacter = source[currentPosition++])
2304 // && (source[currentPosition] == 'u')) {
2305 // //-------------unicode traitement ------------
2306 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2307 // currentPosition++;
2308 // while (source[currentPosition] == 'u') {
2309 // currentPosition++;
2312 // Character.getNumericValue(source[currentPosition++]))
2316 // Character.getNumericValue(source[currentPosition++]))
2320 // Character.getNumericValue(source[currentPosition++]))
2324 // Character.getNumericValue(source[currentPosition++]))
2327 // //error don't care of the value
2328 // currentCharacter = 'A';
2329 // } //something different from * and /
2331 // currentCharacter =
2332 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2336 } catch (IndexOutOfBoundsException e) {
2344 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2346 scanIdentifierOrKeyword((currentCharacter == '$'));
2347 } catch (InvalidInputException ex) {
2352 if (Character.isDigit(currentCharacter)) {
2355 } catch (InvalidInputException ex) {
2362 //-----------------end switch while try--------------------
2363 } catch (IndexOutOfBoundsException e) {
2364 } catch (InvalidInputException e) {
2369 // public final boolean jumpOverUnicodeWhiteSpace()
2370 // throws InvalidInputException {
2372 // //handle the case of unicode. Jump over the next whiteSpace
2373 // //making startPosition pointing on the next available char
2374 // //On false, the currentCharacter is filled up with a potential
2378 // this.wasAcr = false;
2379 // int c1, c2, c3, c4;
2380 // int unicodeSize = 6;
2381 // currentPosition++;
2382 // while (source[currentPosition] == 'u') {
2383 // currentPosition++;
2387 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2389 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2391 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2393 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2395 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2398 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2399 // if (recordLineSeparator
2400 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2401 // pushLineSeparator();
2402 // if (Character.isWhitespace(currentCharacter))
2405 // //buffer the new char which is not a white space
2406 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2407 // //withoutUnicodePtr == 1 is true here
2409 // } catch (IndexOutOfBoundsException e) {
2410 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2413 public final int[] getLineEnds() {
2414 //return a bounded copy of this.lineEnds
2416 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2420 public char[] getSource() {
2424 public static boolean isIdentifierOrKeyword(int token) {
2425 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2428 final char[] optimizedCurrentTokenSource1() {
2429 //return always the same char[] build only once
2430 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2431 char charOne = source[startPosition];
2486 return new char[] { charOne };
2490 final char[] optimizedCurrentTokenSource2() {
2492 c0 = source[startPosition];
2493 c1 = source[startPosition + 1];
2495 //return always the same char[] build only once
2496 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2499 return charArray_va;
2501 return charArray_vb;
2503 return charArray_vc;
2505 return charArray_vd;
2507 return charArray_ve;
2509 return charArray_vf;
2511 return charArray_vg;
2513 return charArray_vh;
2515 return charArray_vi;
2517 return charArray_vj;
2519 return charArray_vk;
2521 return charArray_vl;
2523 return charArray_vm;
2525 return charArray_vn;
2527 return charArray_vo;
2529 return charArray_vp;
2531 return charArray_vq;
2533 return charArray_vr;
2535 return charArray_vs;
2537 return charArray_vt;
2539 return charArray_vu;
2541 return charArray_vv;
2543 return charArray_vw;
2545 return charArray_vx;
2547 return charArray_vy;
2549 return charArray_vz;
2552 //try to return the same char[] build only once
2553 int hash = ((c0 << 6) + c1) % TableSize;
2554 char[][] table = charArray_length[0][hash];
2556 while (++i < InternalTableSize) {
2557 char[] charArray = table[i];
2558 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2561 //---------other side---------
2563 int max = newEntry2;
2564 while (++i <= max) {
2565 char[] charArray = table[i];
2566 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2569 //--------add the entry-------
2570 if (++max >= InternalTableSize)
2573 table[max] = (r = new char[] { c0, c1 });
2578 final char[] optimizedCurrentTokenSource3() {
2579 //try to return the same char[] build only once
2581 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2583 char[][] table = charArray_length[1][hash];
2585 while (++i < InternalTableSize) {
2586 char[] charArray = table[i];
2587 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2590 //---------other side---------
2592 int max = newEntry3;
2593 while (++i <= max) {
2594 char[] charArray = table[i];
2595 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2598 //--------add the entry-------
2599 if (++max >= InternalTableSize)
2602 table[max] = (r = new char[] { c0, c1, c2 });
2607 final char[] optimizedCurrentTokenSource4() {
2608 //try to return the same char[] build only once
2609 char c0, c1, c2, c3;
2610 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2611 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2613 char[][] table = charArray_length[2][(int) hash];
2615 while (++i < InternalTableSize) {
2616 char[] charArray = table[i];
2617 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2620 //---------other side---------
2622 int max = newEntry4;
2623 while (++i <= max) {
2624 char[] charArray = table[i];
2625 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2628 //--------add the entry-------
2629 if (++max >= InternalTableSize)
2632 table[max] = (r = new char[] { c0, c1, c2, c3 });
2637 final char[] optimizedCurrentTokenSource5() {
2638 //try to return the same char[] build only once
2639 char c0, c1, c2, c3, c4;
2640 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2641 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2643 char[][] table = charArray_length[3][(int) hash];
2645 while (++i < InternalTableSize) {
2646 char[] charArray = table[i];
2647 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2650 //---------other side---------
2652 int max = newEntry5;
2653 while (++i <= max) {
2654 char[] charArray = table[i];
2655 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2658 //--------add the entry-------
2659 if (++max >= InternalTableSize)
2662 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2667 final char[] optimizedCurrentTokenSource6() {
2668 //try to return the same char[] build only once
2669 char c0, c1, c2, c3, c4, c5;
2670 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2671 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2672 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2674 char[][] table = charArray_length[4][(int) hash];
2676 while (++i < InternalTableSize) {
2677 char[] charArray = table[i];
2678 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2679 && (c5 == charArray[5]))
2682 //---------other side---------
2684 int max = newEntry6;
2685 while (++i <= max) {
2686 char[] charArray = table[i];
2687 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2688 && (c5 == charArray[5]))
2691 //--------add the entry-------
2692 if (++max >= InternalTableSize)
2695 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2700 public final void pushLineSeparator() throws InvalidInputException {
2701 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2702 final int INCREMENT = 250;
2703 if (this.checkNonExternalizedStringLiterals) {
2704 // reinitialize the current line for non externalize strings purpose
2707 //currentCharacter is at position currentPosition-1
2709 if (currentCharacter == '\r') {
2710 int separatorPos = currentPosition - 1;
2711 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2713 //System.out.println("CR-" + separatorPos);
2715 lineEnds[++linePtr] = separatorPos;
2716 } catch (IndexOutOfBoundsException e) {
2717 //linePtr value is correct
2718 int oldLength = lineEnds.length;
2719 int[] old = lineEnds;
2720 lineEnds = new int[oldLength + INCREMENT];
2721 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2722 lineEnds[linePtr] = separatorPos;
2724 // look-ahead for merged cr+lf
2726 if (source[currentPosition] == '\n') {
2727 //System.out.println("look-ahead LF-" + currentPosition);
2728 lineEnds[linePtr] = currentPosition;
2734 } catch (IndexOutOfBoundsException e) {
2739 if (currentCharacter == '\n') {
2740 //must merge eventual cr followed by lf
2741 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2742 //System.out.println("merge LF-" + (currentPosition - 1));
2743 lineEnds[linePtr] = currentPosition - 1;
2745 int separatorPos = currentPosition - 1;
2746 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2748 // System.out.println("LF-" + separatorPos);
2750 lineEnds[++linePtr] = separatorPos;
2751 } catch (IndexOutOfBoundsException e) {
2752 //linePtr value is correct
2753 int oldLength = lineEnds.length;
2754 int[] old = lineEnds;
2755 lineEnds = new int[oldLength + INCREMENT];
2756 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2757 lineEnds[linePtr] = separatorPos;
2765 public final void pushUnicodeLineSeparator() {
2766 // isUnicode means that the \r or \n has been read as a unicode character
2767 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2768 final int INCREMENT = 250;
2769 //currentCharacter is at position currentPosition-1
2770 if (this.checkNonExternalizedStringLiterals) {
2771 // reinitialize the current line for non externalize strings purpose
2775 if (currentCharacter == '\r') {
2776 int separatorPos = currentPosition - 6;
2777 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2779 //System.out.println("CR-" + separatorPos);
2781 lineEnds[++linePtr] = separatorPos;
2782 } catch (IndexOutOfBoundsException e) {
2783 //linePtr value is correct
2784 int oldLength = lineEnds.length;
2785 int[] old = lineEnds;
2786 lineEnds = new int[oldLength + INCREMENT];
2787 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2788 lineEnds[linePtr] = separatorPos;
2790 // look-ahead for merged cr+lf
2791 if (source[currentPosition] == '\n') {
2792 //System.out.println("look-ahead LF-" + currentPosition);
2793 lineEnds[linePtr] = currentPosition;
2801 if (currentCharacter == '\n') {
2802 //must merge eventual cr followed by lf
2803 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2804 //System.out.println("merge LF-" + (currentPosition - 1));
2805 lineEnds[linePtr] = currentPosition - 6;
2807 int separatorPos = currentPosition - 6;
2808 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2810 // System.out.println("LF-" + separatorPos);
2812 lineEnds[++linePtr] = separatorPos;
2813 } catch (IndexOutOfBoundsException e) {
2814 //linePtr value is correct
2815 int oldLength = lineEnds.length;
2816 int[] old = lineEnds;
2817 lineEnds = new int[oldLength + INCREMENT];
2818 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2819 lineEnds[linePtr] = separatorPos;
2827 public void recordComment(int token) {
2829 int stopPosition = this.currentPosition;
2831 case TokenNameCOMMENT_LINE:
2832 stopPosition = -this.lastCommentLinePosition;
2834 case TokenNameCOMMENT_BLOCK:
2835 stopPosition = -this.currentPosition;
2839 // a new comment is recorded
2840 int length = this.commentStops.length;
2841 if (++this.commentPtr >= length) {
2842 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2843 //grows the positions buffers too
2844 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2846 this.commentStops[this.commentPtr] = stopPosition;
2847 this.commentStarts[this.commentPtr] = this.startPosition;
2850 // public final void recordComment(boolean isJavadoc) {
2851 // // a new annotation comment is recorded
2853 // commentStops[++commentPtr] = isJavadoc
2854 // ? currentPosition
2855 // : -currentPosition;
2856 // } catch (IndexOutOfBoundsException e) {
2857 // int oldStackLength = commentStops.length;
2858 // int[] oldStack = commentStops;
2859 // commentStops = new int[oldStackLength + 30];
2860 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2861 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2862 // //grows the positions buffers too
2863 // int[] old = commentStarts;
2864 // commentStarts = new int[oldStackLength + 30];
2865 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2867 // //the buffer is of a correct size here
2868 // commentStarts[commentPtr] = startPosition;
2870 public void resetTo(int begin, int end) {
2871 //reset the scanner to a given position where it may rescan again
2873 initialPosition = startPosition = currentPosition = begin;
2874 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2875 commentPtr = -1; // reset comment stack
2878 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2879 // the string with "\\u" is a legal string of two chars \ and u
2880 //thus we use a direct access to the source (for regular cases).
2881 // if (unicodeAsBackSlash) {
2882 // // consume next character
2883 // unicodeAsBackSlash = false;
2884 // if (((currentCharacter = source[currentPosition++]) == '\\')
2885 // && (source[currentPosition] == 'u')) {
2886 // getNextUnicodeChar();
2888 // if (withoutUnicodePtr != 0) {
2889 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2893 currentCharacter = source[currentPosition++];
2894 switch (currentCharacter) {
2896 currentCharacter = '\'';
2899 currentCharacter = '\\';
2902 currentCharacter = '\\';
2907 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2908 // the string with "\\u" is a legal string of two chars \ and u
2909 //thus we use a direct access to the source (for regular cases).
2910 // if (unicodeAsBackSlash) {
2911 // // consume next character
2912 // unicodeAsBackSlash = false;
2913 // if (((currentCharacter = source[currentPosition++]) == '\\')
2914 // && (source[currentPosition] == 'u')) {
2915 // getNextUnicodeChar();
2917 // if (withoutUnicodePtr != 0) {
2918 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2922 currentCharacter = source[currentPosition++];
2923 switch (currentCharacter) {
2925 // currentCharacter = '\b';
2928 currentCharacter = '\t';
2931 currentCharacter = '\n';
2934 // currentCharacter = '\f';
2937 currentCharacter = '\r';
2940 currentCharacter = '\"';
2943 currentCharacter = '\'';
2946 currentCharacter = '\\';
2949 currentCharacter = '$';
2952 // -----------octal escape--------------
2954 // OctalDigit OctalDigit
2955 // ZeroToThree OctalDigit OctalDigit
2956 int number = Character.getNumericValue(currentCharacter);
2957 if (number >= 0 && number <= 7) {
2958 boolean zeroToThreeNot = number > 3;
2959 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2960 int digit = Character.getNumericValue(currentCharacter);
2961 if (digit >= 0 && digit <= 7) {
2962 number = (number * 8) + digit;
2963 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2964 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2965 // Digit --> ignore last character
2968 digit = Character.getNumericValue(currentCharacter);
2969 if (digit >= 0 && digit <= 7) {
2970 // has read \ZeroToThree OctalDigit OctalDigit
2971 number = (number * 8) + digit;
2972 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2973 // --> ignore last character
2977 } else { // has read \OctalDigit NonDigit--> ignore last
2981 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2985 } else { // has read \OctalDigit --> ignore last character
2989 throw new InvalidInputException(INVALID_ESCAPE);
2990 currentCharacter = (char) number;
2993 // throw new InvalidInputException(INVALID_ESCAPE);
2997 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2998 // return scanIdentifierOrKeyword( false );
3000 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3002 //first dispatch on the first char.
3003 //then the length. If there are several
3004 //keywors with the same length AND the same first char, then do another
3005 //disptach on the second char :-)...cool....but fast !
3006 useAssertAsAnIndentifier = false;
3007 while (getNextCharAsJavaIdentifierPart()) {
3011 // if (new String(getCurrentTokenSource()).equals("$this")) {
3012 // return TokenNamethis;
3014 return TokenNameVariable;
3019 // if (withoutUnicodePtr == 0)
3020 //quick test on length == 1 but not on length > 12 while most identifier
3021 //have a length which is <= 12...but there are lots of identifier with
3024 if ((length = currentPosition - startPosition) == 1)
3025 return TokenNameIdentifier;
3027 data = new char[length];
3028 index = startPosition;
3029 for (int i = 0; i < length; i++) {
3030 data[i] = Character.toLowerCase(source[index + i]);
3034 // if ((length = withoutUnicodePtr) == 1)
3035 // return TokenNameIdentifier;
3036 // // data = withoutUnicodeBuffer;
3037 // data = new char[withoutUnicodeBuffer.length];
3038 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3039 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3043 firstLetter = data[index];
3044 switch (firstLetter) {
3049 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3050 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3051 return TokenNameFILE;
3052 index = 0; //__LINE__
3053 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3054 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3055 return TokenNameLINE;
3059 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3060 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3061 return TokenNameCLASS_C;
3065 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3066 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3067 && (data[++index] == '_'))
3068 return TokenNameMETHOD_C;
3072 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3073 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3074 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3075 return TokenNameFUNC_C;
3078 return TokenNameIdentifier;
3080 // as and array abstract
3084 if ((data[++index] == 's')) {
3087 return TokenNameIdentifier;
3091 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3092 return TokenNameand;
3094 return TokenNameIdentifier;
3098 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3099 return TokenNamearray;
3101 return TokenNameIdentifier;
3103 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3104 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3105 return TokenNameabstract;
3107 return TokenNameIdentifier;
3109 return TokenNameIdentifier;
3115 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3116 return TokenNamebreak;
3118 return TokenNameIdentifier;
3120 return TokenNameIdentifier;
3123 //case catch class clone const continue
3126 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3127 return TokenNamecase;
3129 return TokenNameIdentifier;
3131 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3132 return TokenNamecatch;
3134 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3135 return TokenNameclass;
3137 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3138 return TokenNameclone;
3140 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3141 return TokenNameconst;
3143 return TokenNameIdentifier;
3145 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3146 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3147 return TokenNamecontinue;
3149 return TokenNameIdentifier;
3151 return TokenNameIdentifier;
3154 // declare default do die
3155 // TODO delete define ==> no keyword !
3158 if ((data[++index] == 'o'))
3161 return TokenNameIdentifier;
3163 // if ((data[++index] == 'e')
3164 // && (data[++index] == 'f')
3165 // && (data[++index] == 'i')
3166 // && (data[++index] == 'n')
3167 // && (data[++index] == 'e'))
3168 // return TokenNamedefine;
3170 // return TokenNameIdentifier;
3172 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3173 && (data[++index] == 'r') && (data[++index] == 'e'))
3174 return TokenNamedeclare;
3176 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3177 && (data[++index] == 'l') && (data[++index] == 't'))
3178 return TokenNamedefault;
3180 return TokenNameIdentifier;
3182 return TokenNameIdentifier;
3185 //echo else exit elseif extends eval
3188 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3189 return TokenNameecho;
3190 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3191 return TokenNameelse;
3192 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3193 return TokenNameexit;
3194 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3195 return TokenNameeval;
3197 return TokenNameIdentifier;
3200 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3201 return TokenNameendif;
3202 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3203 return TokenNameempty;
3205 return TokenNameIdentifier;
3208 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3209 && (data[++index] == 'r'))
3210 return TokenNameendfor;
3211 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3212 && (data[++index] == 'f'))
3213 return TokenNameelseif;
3215 return TokenNameIdentifier;
3217 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3218 && (data[++index] == 'd') && (data[++index] == 's'))
3219 return TokenNameextends;
3221 return TokenNameIdentifier;
3224 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3225 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3226 return TokenNameendwhile;
3228 return TokenNameIdentifier;
3231 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3232 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3233 return TokenNameendswitch;
3235 return TokenNameIdentifier;
3238 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3239 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3240 && (data[++index] == 'e'))
3241 return TokenNameenddeclare;
3243 if ((data[++index] == 'n') // endforeach
3244 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3245 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3246 return TokenNameendforeach;
3248 return TokenNameIdentifier;
3250 return TokenNameIdentifier;
3253 //for false final function
3256 if ((data[++index] == 'o') && (data[++index] == 'r'))
3257 return TokenNamefor;
3259 return TokenNameIdentifier;
3261 // if ((data[++index] == 'a') && (data[++index] == 'l')
3262 // && (data[++index] == 's') && (data[++index] == 'e'))
3263 // return TokenNamefalse;
3264 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3265 return TokenNamefinal;
3267 return TokenNameIdentifier;
3270 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3271 && (data[++index] == 'c') && (data[++index] == 'h'))
3272 return TokenNameforeach;
3274 return TokenNameIdentifier;
3277 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3278 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3279 return TokenNamefunction;
3281 return TokenNameIdentifier;
3283 return TokenNameIdentifier;
3288 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3289 && (data[++index] == 'l')) {
3290 return TokenNameglobal;
3293 return TokenNameIdentifier;
3295 //if int isset include include_once instanceof interface implements
3298 if (data[++index] == 'f')
3301 return TokenNameIdentifier;
3303 // if ((data[++index] == 'n') && (data[++index] == 't'))
3304 // return TokenNameint;
3306 // return TokenNameIdentifier;
3308 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3309 return TokenNameisset;
3311 return TokenNameIdentifier;
3313 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3314 && (data[++index] == 'd') && (data[++index] == 'e'))
3315 return TokenNameinclude;
3317 return TokenNameIdentifier;
3320 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3321 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3322 return TokenNameinterface;
3324 return TokenNameIdentifier;
3327 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3328 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3329 && (data[++index] == 'f'))
3330 return TokenNameinstanceof;
3331 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3332 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3333 && (data[++index] == 's'))
3334 return TokenNameimplements;
3336 return TokenNameIdentifier;
3338 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3339 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3340 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3341 return TokenNameinclude_once;
3343 return TokenNameIdentifier;
3345 return TokenNameIdentifier;
3350 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3351 return TokenNamelist;
3354 return TokenNameIdentifier;
3359 if ((data[++index] == 'e') && (data[++index] == 'w'))
3360 return TokenNamenew;
3362 return TokenNameIdentifier;
3364 // if ((data[++index] == 'u') && (data[++index] == 'l')
3365 // && (data[++index] == 'l'))
3366 // return TokenNamenull;
3368 // return TokenNameIdentifier;
3370 return TokenNameIdentifier;
3375 if (data[++index] == 'r') {
3379 // if (length == 12) {
3380 // if ((data[++index] == 'l')
3381 // && (data[++index] == 'd')
3382 // && (data[++index] == '_')
3383 // && (data[++index] == 'f')
3384 // && (data[++index] == 'u')
3385 // && (data[++index] == 'n')
3386 // && (data[++index] == 'c')
3387 // && (data[++index] == 't')
3388 // && (data[++index] == 'i')
3389 // && (data[++index] == 'o')
3390 // && (data[++index] == 'n')) {
3391 // return TokenNameold_function;
3394 return TokenNameIdentifier;
3396 // print public private protected
3399 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3400 return TokenNameprint;
3402 return TokenNameIdentifier;
3404 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3405 && (data[++index] == 'c')) {
3406 return TokenNamepublic;
3408 return TokenNameIdentifier;
3410 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3411 && (data[++index] == 't') && (data[++index] == 'e')) {
3412 return TokenNameprivate;
3414 return TokenNameIdentifier;
3416 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3417 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3418 return TokenNameprotected;
3420 return TokenNameIdentifier;
3422 return TokenNameIdentifier;
3424 //return require require_once
3426 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3427 && (data[++index] == 'n')) {
3428 return TokenNamereturn;
3430 } else if (length == 7) {
3431 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3432 && (data[++index] == 'r') && (data[++index] == 'e')) {
3433 return TokenNamerequire;
3435 } else if (length == 12) {
3436 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3437 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3438 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3439 return TokenNamerequire_once;
3442 return TokenNameIdentifier;
3447 if (data[++index] == 't')
3448 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3449 return TokenNamestatic;
3451 return TokenNameIdentifier;
3452 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3453 && (data[++index] == 'h'))
3454 return TokenNameswitch;
3456 return TokenNameIdentifier;
3458 return TokenNameIdentifier;
3464 if ((data[++index] == 'r') && (data[++index] == 'y'))
3465 return TokenNametry;
3467 return TokenNameIdentifier;
3469 // if ((data[++index] == 'r') && (data[++index] == 'u')
3470 // && (data[++index] == 'e'))
3471 // return TokenNametrue;
3473 // return TokenNameIdentifier;
3475 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3476 return TokenNamethrow;
3478 return TokenNameIdentifier;
3480 return TokenNameIdentifier;
3486 if ((data[++index] == 's') && (data[++index] == 'e'))
3487 return TokenNameuse;
3489 return TokenNameIdentifier;
3491 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3492 return TokenNameunset;
3494 return TokenNameIdentifier;
3496 return TokenNameIdentifier;
3502 if ((data[++index] == 'a') && (data[++index] == 'r'))
3503 return TokenNamevar;
3505 return TokenNameIdentifier;
3507 return TokenNameIdentifier;
3513 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3514 return TokenNamewhile;
3516 return TokenNameIdentifier;
3517 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3518 // (data[++index]=='e') && (data[++index]=='f')&&
3519 // (data[++index]=='p'))
3520 //return TokenNamewidefp ;
3522 //return TokenNameIdentifier;
3524 return TokenNameIdentifier;
3530 if ((data[++index] == 'o') && (data[++index] == 'r'))
3531 return TokenNamexor;
3533 return TokenNameIdentifier;
3535 return TokenNameIdentifier;
3538 return TokenNameIdentifier;
3542 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3543 //when entering this method the currentCharacter is the firt
3544 //digit of the number , i.e. it may be preceeded by a . when
3546 boolean floating = dotPrefix;
3547 if ((!dotPrefix) && (currentCharacter == '0')) {
3548 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3549 //force the first char of the hexa number do exist...
3550 // consume next character
3551 unicodeAsBackSlash = false;
3552 currentCharacter = source[currentPosition++];
3553 // if (((currentCharacter = source[currentPosition++]) == '\\')
3554 // && (source[currentPosition] == 'u')) {
3555 // getNextUnicodeChar();
3557 // if (withoutUnicodePtr != 0) {
3558 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3561 if (Character.digit(currentCharacter, 16) == -1)
3562 throw new InvalidInputException(INVALID_HEXA);
3564 while (getNextCharAsDigit(16)) {
3567 // if (getNextChar('l', 'L') >= 0)
3568 // return TokenNameLongLiteral;
3570 return TokenNameIntegerLiteral;
3572 //there is x or X in the number
3573 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3574 // 00078.0 is true !!!!! crazy language
3575 if (getNextCharAsDigit()) {
3576 //-------------potential octal-----------------
3577 while (getNextCharAsDigit()) {
3580 // if (getNextChar('l', 'L') >= 0) {
3581 // return TokenNameLongLiteral;
3584 // if (getNextChar('f', 'F') >= 0) {
3585 // return TokenNameFloatingPointLiteral;
3587 if (getNextChar('d', 'D') >= 0) {
3588 return TokenNameDoubleLiteral;
3589 } else { //make the distinction between octal and float ....
3590 if (getNextChar('.')) { //bingo ! ....
3591 while (getNextCharAsDigit()) {
3594 if (getNextChar('e', 'E') >= 0) {
3595 // consume next character
3596 unicodeAsBackSlash = false;
3597 currentCharacter = source[currentPosition++];
3598 // if (((currentCharacter = source[currentPosition++]) == '\\')
3599 // && (source[currentPosition] == 'u')) {
3600 // getNextUnicodeChar();
3602 // if (withoutUnicodePtr != 0) {
3603 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3606 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3607 // consume next character
3608 unicodeAsBackSlash = false;
3609 currentCharacter = source[currentPosition++];
3610 // if (((currentCharacter = source[currentPosition++]) == '\\')
3611 // && (source[currentPosition] == 'u')) {
3612 // getNextUnicodeChar();
3614 // if (withoutUnicodePtr != 0) {
3615 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3616 // currentCharacter;
3620 if (!Character.isDigit(currentCharacter))
3621 throw new InvalidInputException(INVALID_FLOAT);
3622 while (getNextCharAsDigit()) {
3626 // if (getNextChar('f', 'F') >= 0)
3627 // return TokenNameFloatingPointLiteral;
3628 getNextChar('d', 'D'); //jump over potential d or D
3629 return TokenNameDoubleLiteral;
3631 return TokenNameIntegerLiteral;
3638 while (getNextCharAsDigit()) {
3641 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3642 // return TokenNameLongLiteral;
3643 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3644 while (getNextCharAsDigit()) {
3649 //if floating is true both exponant and suffix may be optional
3650 if (getNextChar('e', 'E') >= 0) {
3652 // consume next character
3653 unicodeAsBackSlash = false;
3654 currentCharacter = source[currentPosition++];
3655 // if (((currentCharacter = source[currentPosition++]) == '\\')
3656 // && (source[currentPosition] == 'u')) {
3657 // getNextUnicodeChar();
3659 // if (withoutUnicodePtr != 0) {
3660 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3663 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3666 unicodeAsBackSlash = false;
3667 currentCharacter = source[currentPosition++];
3668 // if (((currentCharacter = source[currentPosition++]) == '\\')
3669 // && (source[currentPosition] == 'u')) {
3670 // getNextUnicodeChar();
3672 // if (withoutUnicodePtr != 0) {
3673 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3677 if (!Character.isDigit(currentCharacter))
3678 throw new InvalidInputException(INVALID_FLOAT);
3679 while (getNextCharAsDigit()) {
3683 if (getNextChar('d', 'D') >= 0)
3684 return TokenNameDoubleLiteral;
3685 // if (getNextChar('f', 'F') >= 0)
3686 // return TokenNameFloatingPointLiteral;
3687 //the long flag has been tested before
3688 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3692 * Search the line number corresponding to a specific position
3695 public final int getLineNumber(int position) {
3696 if (lineEnds == null)
3698 int length = linePtr + 1;
3701 int g = 0, d = length - 1;
3705 if (position < lineEnds[m]) {
3707 } else if (position > lineEnds[m]) {
3713 if (position < lineEnds[m]) {
3719 public void setPHPMode(boolean mode) {
3723 public final void setSource(char[] source) {
3724 setSource(null, source);
3727 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3728 //the source-buffer is set to sourceString
3729 this.compilationUnit = compilationUnit;
3730 if (source == null) {
3731 this.source = new char[0];
3733 this.source = source;
3736 initialPosition = currentPosition = 0;
3737 containsAssertKeyword = false;
3738 withoutUnicodeBuffer = new char[this.source.length];
3739 encapsedStringStack = new Stack();
3742 public String toString() {
3743 if (startPosition == source.length)
3744 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3745 if (currentPosition > source.length)
3746 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3747 char front[] = new char[startPosition];
3748 System.arraycopy(source, 0, front, 0, startPosition);
3749 int middleLength = (currentPosition - 1) - startPosition + 1;
3751 if (middleLength > -1) {
3752 middle = new char[middleLength];
3753 System.arraycopy(source, startPosition, middle, 0, middleLength);
3755 middle = new char[0];
3757 char end[] = new char[source.length - (currentPosition - 1)];
3758 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3759 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3760 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3764 public final String toStringAction(int act) {
3766 case TokenNameERROR:
3767 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3769 case TokenNameINLINE_HTML:
3770 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3771 case TokenNameIdentifier:
3772 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3773 case TokenNameVariable:
3774 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3775 case TokenNameabstract:
3776 return "abstract"; //$NON-NLS-1$
3778 return "AND"; //$NON-NLS-1$
3779 case TokenNamearray:
3780 return "array"; //$NON-NLS-1$
3782 return "as"; //$NON-NLS-1$
3783 case TokenNamebreak:
3784 return "break"; //$NON-NLS-1$
3786 return "case"; //$NON-NLS-1$
3787 case TokenNameclass:
3788 return "class"; //$NON-NLS-1$
3789 case TokenNamecatch:
3790 return "catch"; //$NON-NLS-1$
3791 case TokenNameclone:
3794 case TokenNameconst:
3797 case TokenNamecontinue:
3798 return "continue"; //$NON-NLS-1$
3799 case TokenNamedefault:
3800 return "default"; //$NON-NLS-1$
3801 // case TokenNamedefine :
3802 // return "define"; //$NON-NLS-1$
3804 return "do"; //$NON-NLS-1$
3806 return "echo"; //$NON-NLS-1$
3808 return "else"; //$NON-NLS-1$
3809 case TokenNameelseif:
3810 return "elseif"; //$NON-NLS-1$
3811 case TokenNameendfor:
3812 return "endfor"; //$NON-NLS-1$
3813 case TokenNameendforeach:
3814 return "endforeach"; //$NON-NLS-1$
3815 case TokenNameendif:
3816 return "endif"; //$NON-NLS-1$
3817 case TokenNameendswitch:
3818 return "endswitch"; //$NON-NLS-1$
3819 case TokenNameendwhile:
3820 return "endwhile"; //$NON-NLS-1$
3823 case TokenNameextends:
3824 return "extends"; //$NON-NLS-1$
3825 // case TokenNamefalse :
3826 // return "false"; //$NON-NLS-1$
3827 case TokenNamefinal:
3828 return "final"; //$NON-NLS-1$
3830 return "for"; //$NON-NLS-1$
3831 case TokenNameforeach:
3832 return "foreach"; //$NON-NLS-1$
3833 case TokenNamefunction:
3834 return "function"; //$NON-NLS-1$
3835 case TokenNameglobal:
3836 return "global"; //$NON-NLS-1$
3838 return "if"; //$NON-NLS-1$
3839 case TokenNameimplements:
3840 return "implements"; //$NON-NLS-1$
3841 case TokenNameinclude:
3842 return "include"; //$NON-NLS-1$
3843 case TokenNameinclude_once:
3844 return "include_once"; //$NON-NLS-1$
3845 case TokenNameinstanceof:
3846 return "instanceof"; //$NON-NLS-1$
3847 case TokenNameinterface:
3848 return "interface"; //$NON-NLS-1$
3849 case TokenNameisset:
3850 return "isset"; //$NON-NLS-1$
3852 return "list"; //$NON-NLS-1$
3854 return "new"; //$NON-NLS-1$
3855 // case TokenNamenull :
3856 // return "null"; //$NON-NLS-1$
3858 return "OR"; //$NON-NLS-1$
3859 case TokenNameprint:
3860 return "print"; //$NON-NLS-1$
3861 case TokenNameprivate:
3862 return "private"; //$NON-NLS-1$
3863 case TokenNameprotected:
3864 return "protected"; //$NON-NLS-1$
3865 case TokenNamepublic:
3866 return "public"; //$NON-NLS-1$
3867 case TokenNamerequire:
3868 return "require"; //$NON-NLS-1$
3869 case TokenNamerequire_once:
3870 return "require_once"; //$NON-NLS-1$
3871 case TokenNamereturn:
3872 return "return"; //$NON-NLS-1$
3873 case TokenNamestatic:
3874 return "static"; //$NON-NLS-1$
3875 case TokenNameswitch:
3876 return "switch"; //$NON-NLS-1$
3877 // case TokenNametrue :
3878 // return "true"; //$NON-NLS-1$
3879 case TokenNameunset:
3880 return "unset"; //$NON-NLS-1$
3882 return "var"; //$NON-NLS-1$
3883 case TokenNamewhile:
3884 return "while"; //$NON-NLS-1$
3886 return "XOR"; //$NON-NLS-1$
3887 // case TokenNamethis :
3888 // return "$this"; //$NON-NLS-1$
3889 case TokenNameIntegerLiteral:
3890 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3891 case TokenNameDoubleLiteral:
3892 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3893 case TokenNameStringDoubleQuote:
3894 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3895 case TokenNameStringSingleQuote:
3896 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3897 case TokenNameStringInterpolated:
3898 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3899 case TokenNameEncapsedString0:
3900 return "`"; //$NON-NLS-1$
3901 case TokenNameEncapsedString1:
3902 return "\'"; //$NON-NLS-1$
3903 case TokenNameEncapsedString2:
3904 return "\""; //$NON-NLS-1$
3905 case TokenNameSTRING:
3906 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3907 case TokenNameHEREDOC:
3908 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3909 case TokenNamePLUS_PLUS:
3910 return "++"; //$NON-NLS-1$
3911 case TokenNameMINUS_MINUS:
3912 return "--"; //$NON-NLS-1$
3913 case TokenNameEQUAL_EQUAL:
3914 return "=="; //$NON-NLS-1$
3915 case TokenNameEQUAL_EQUAL_EQUAL:
3916 return "==="; //$NON-NLS-1$
3917 case TokenNameEQUAL_GREATER:
3918 return "=>"; //$NON-NLS-1$
3919 case TokenNameLESS_EQUAL:
3920 return "<="; //$NON-NLS-1$
3921 case TokenNameGREATER_EQUAL:
3922 return ">="; //$NON-NLS-1$
3923 case TokenNameNOT_EQUAL:
3924 return "!="; //$NON-NLS-1$
3925 case TokenNameNOT_EQUAL_EQUAL:
3926 return "!=="; //$NON-NLS-1$
3927 case TokenNameLEFT_SHIFT:
3928 return "<<"; //$NON-NLS-1$
3929 case TokenNameRIGHT_SHIFT:
3930 return ">>"; //$NON-NLS-1$
3931 case TokenNamePLUS_EQUAL:
3932 return "+="; //$NON-NLS-1$
3933 case TokenNameMINUS_EQUAL:
3934 return "-="; //$NON-NLS-1$
3935 case TokenNameMULTIPLY_EQUAL:
3936 return "*="; //$NON-NLS-1$
3937 case TokenNameDIVIDE_EQUAL:
3938 return "/="; //$NON-NLS-1$
3939 case TokenNameAND_EQUAL:
3940 return "&="; //$NON-NLS-1$
3941 case TokenNameOR_EQUAL:
3942 return "|="; //$NON-NLS-1$
3943 case TokenNameXOR_EQUAL:
3944 return "^="; //$NON-NLS-1$
3945 case TokenNameREMAINDER_EQUAL:
3946 return "%="; //$NON-NLS-1$
3947 case TokenNameDOT_EQUAL:
3948 return ".="; //$NON-NLS-1$
3949 case TokenNameLEFT_SHIFT_EQUAL:
3950 return "<<="; //$NON-NLS-1$
3951 case TokenNameRIGHT_SHIFT_EQUAL:
3952 return ">>="; //$NON-NLS-1$
3953 case TokenNameOR_OR:
3954 return "||"; //$NON-NLS-1$
3955 case TokenNameAND_AND:
3956 return "&&"; //$NON-NLS-1$
3958 return "+"; //$NON-NLS-1$
3959 case TokenNameMINUS:
3960 return "-"; //$NON-NLS-1$
3961 case TokenNameMINUS_GREATER:
3964 return "!"; //$NON-NLS-1$
3965 case TokenNameREMAINDER:
3966 return "%"; //$NON-NLS-1$
3968 return "^"; //$NON-NLS-1$
3970 return "&"; //$NON-NLS-1$
3971 case TokenNameMULTIPLY:
3972 return "*"; //$NON-NLS-1$
3974 return "|"; //$NON-NLS-1$
3975 case TokenNameTWIDDLE:
3976 return "~"; //$NON-NLS-1$
3977 case TokenNameTWIDDLE_EQUAL:
3978 return "~="; //$NON-NLS-1$
3979 case TokenNameDIVIDE:
3980 return "/"; //$NON-NLS-1$
3981 case TokenNameGREATER:
3982 return ">"; //$NON-NLS-1$
3984 return "<"; //$NON-NLS-1$
3985 case TokenNameLPAREN:
3986 return "("; //$NON-NLS-1$
3987 case TokenNameRPAREN:
3988 return ")"; //$NON-NLS-1$
3989 case TokenNameLBRACE:
3990 return "{"; //$NON-NLS-1$
3991 case TokenNameRBRACE:
3992 return "}"; //$NON-NLS-1$
3993 case TokenNameLBRACKET:
3994 return "["; //$NON-NLS-1$
3995 case TokenNameRBRACKET:
3996 return "]"; //$NON-NLS-1$
3997 case TokenNameSEMICOLON:
3998 return ";"; //$NON-NLS-1$
3999 case TokenNameQUESTION:
4000 return "?"; //$NON-NLS-1$
4001 case TokenNameCOLON:
4002 return ":"; //$NON-NLS-1$
4003 case TokenNameCOMMA:
4004 return ","; //$NON-NLS-1$
4006 return "."; //$NON-NLS-1$
4007 case TokenNameEQUAL:
4008 return "="; //$NON-NLS-1$
4011 case TokenNameDOLLAR:
4013 case TokenNameDOLLAR_LBRACE:
4015 case TokenNameLBRACE_DOLLAR:
4018 return "EOF"; //$NON-NLS-1$
4019 case TokenNameWHITESPACE:
4020 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4021 case TokenNameCOMMENT_LINE:
4022 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4023 case TokenNameCOMMENT_BLOCK:
4024 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4025 case TokenNameCOMMENT_PHPDOC:
4026 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4027 // case TokenNameHTML :
4028 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4031 return "__FILE__"; //$NON-NLS-1$
4033 return "__LINE__"; //$NON-NLS-1$
4034 case TokenNameCLASS_C:
4035 return "__CLASS__"; //$NON-NLS-1$
4036 case TokenNameMETHOD_C:
4037 return "__METHOD__"; //$NON-NLS-1$
4038 case TokenNameFUNC_C:
4039 return "__FUNCTION__"; //$NON-NLS-1
4040 case TokenNameboolCAST:
4041 return "( bool )"; //$NON-NLS-1$
4042 case TokenNameintCAST:
4043 return "( int )"; //$NON-NLS-1$
4044 case TokenNamedoubleCAST:
4045 return "( double )"; //$NON-NLS-1$
4046 case TokenNameobjectCAST:
4047 return "( object )"; //$NON-NLS-1$
4048 case TokenNamestringCAST:
4049 return "( string )"; //$NON-NLS-1$
4051 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4059 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4060 this(tokenizeComments, tokenizeWhiteSpace, false);
4063 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4064 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4067 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4068 boolean assertMode) {
4069 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4072 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4073 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4074 this.eofPosition = Integer.MAX_VALUE;
4075 this.tokenizeComments = tokenizeComments;
4076 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4077 this.tokenizeStrings = tokenizeStrings;
4078 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4079 this.assertMode = assertMode;
4080 this.encapsedStringStack = null;
4081 this.taskTags = taskTags;
4082 this.taskPriorities = taskPriorities;
4085 private void checkNonExternalizeString() throws InvalidInputException {
4086 if (currentLine == null)
4088 parseTags(currentLine);
4091 private void parseTags(NLSLine line) throws InvalidInputException {
4092 String s = new String(getCurrentTokenSource());
4093 int pos = s.indexOf(TAG_PREFIX);
4094 int lineLength = line.size();
4096 int start = pos + TAG_PREFIX_LENGTH;
4097 int end = s.indexOf(TAG_POSTFIX, start);
4098 String index = s.substring(start, end);
4101 i = Integer.parseInt(index) - 1;
4102 // Tags are one based not zero based.
4103 } catch (NumberFormatException e) {
4104 i = -1; // we don't want to consider this as a valid NLS tag
4106 if (line.exists(i)) {
4109 pos = s.indexOf(TAG_PREFIX, start);
4111 this.nonNLSStrings = new StringLiteral[lineLength];
4112 int nonNLSCounter = 0;
4113 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4114 StringLiteral literal = (StringLiteral) iterator.next();
4115 if (literal != null) {
4116 this.nonNLSStrings[nonNLSCounter++] = literal;
4119 if (nonNLSCounter == 0) {
4120 this.nonNLSStrings = null;
4124 this.wasNonExternalizedStringLiteral = true;
4125 if (nonNLSCounter != lineLength) {
4126 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4131 public final void scanEscapeCharacter() throws InvalidInputException {
4132 // the string with "\\u" is a legal string of two chars \ and u
4133 //thus we use a direct access to the source (for regular cases).
4134 if (unicodeAsBackSlash) {
4135 // consume next character
4136 unicodeAsBackSlash = false;
4137 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4138 // (source[currentPosition] == 'u')) {
4139 // getNextUnicodeChar();
4141 if (withoutUnicodePtr != 0) {
4142 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4146 currentCharacter = source[currentPosition++];
4147 switch (currentCharacter) {
4149 currentCharacter = '\b';
4152 currentCharacter = '\t';
4155 currentCharacter = '\n';
4158 currentCharacter = '\f';
4161 currentCharacter = '\r';
4164 currentCharacter = '\"';
4167 currentCharacter = '\'';
4170 currentCharacter = '\\';
4173 // -----------octal escape--------------
4175 // OctalDigit OctalDigit
4176 // ZeroToThree OctalDigit OctalDigit
4177 int number = Character.getNumericValue(currentCharacter);
4178 if (number >= 0 && number <= 7) {
4179 boolean zeroToThreeNot = number > 3;
4180 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4181 int digit = Character.getNumericValue(currentCharacter);
4182 if (digit >= 0 && digit <= 7) {
4183 number = (number * 8) + digit;
4184 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4185 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4186 // Digit --> ignore last character
4189 digit = Character.getNumericValue(currentCharacter);
4190 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4191 // OctalDigit OctalDigit
4192 number = (number * 8) + digit;
4193 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4194 // --> ignore last character
4198 } else { // has read \OctalDigit NonDigit--> ignore last
4202 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4206 } else { // has read \OctalDigit --> ignore last character
4210 throw new InvalidInputException(INVALID_ESCAPE);
4211 currentCharacter = (char) number;
4213 throw new InvalidInputException(INVALID_ESCAPE);
4217 //chech presence of task: tags
4218 //TODO (frederic) see if we need to take unicode characters into account...
4219 public void checkTaskTag(int commentStart, int commentEnd) {
4220 char[] src = this.source;
4222 // only look for newer task: tags
4223 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4226 int foundTaskIndex = this.foundTaskCount;
4227 char previous = src[commentStart + 1]; // should be '*' or '/'
4228 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4230 char[] priority = null;
4231 // check for tag occurrence only if not ambiguous with javadoc tag
4232 if (previous != '@') {
4233 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4234 tag = this.taskTags[itag];
4235 int tagLength = tag.length;
4239 // ensure tag is not leaded with letter if tag starts with a letter
4240 if (Character.isJavaIdentifierStart(tag[0])) {
4241 if (Character.isJavaIdentifierPart(previous)) {
4246 for (int t = 0; t < tagLength; t++) {
4249 if (x >= this.eofPosition || x >= commentEnd)
4251 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4252 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4257 // ensure tag is not followed with letter if tag finishes with a letter
4258 if (i + tagLength < commentEnd && Character.isJavaIdentifierPart(src[i + tagLength - 1])) {
4259 if (Character.isJavaIdentifierPart(src[i + tagLength]))
4262 if (this.foundTaskTags == null) {
4263 this.foundTaskTags = new char[5][];
4264 this.foundTaskMessages = new char[5][];
4265 this.foundTaskPriorities = new char[5][];
4266 this.foundTaskPositions = new int[5][];
4267 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4268 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4269 this.foundTaskCount);
4270 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4271 this.foundTaskCount);
4272 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4273 this.foundTaskCount);
4274 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4275 this.foundTaskCount);
4278 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4280 this.foundTaskTags[this.foundTaskCount] = tag;
4281 this.foundTaskPriorities[this.foundTaskCount] = priority;
4282 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4283 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4284 this.foundTaskCount++;
4285 i += tagLength - 1; // will be incremented when looping
4291 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4292 // retrieve message start and end positions
4293 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4294 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4295 // at most beginning of next task
4296 if (max_value < msgStart) {
4297 max_value = msgStart; // would only occur if tag is before EOF.
4301 for (int j = msgStart; j < max_value; j++) {
4302 if ((c = src[j]) == '\n' || c == '\r') {
4308 for (int j = max_value; j > msgStart; j--) {
4309 if ((c = src[j]) == '*') {
4317 if (msgStart == end)
4320 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4322 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4324 // update the end position of the task
4325 this.foundTaskPositions[i][1] = end;
4326 // get the message source
4327 final int messageLength = end - msgStart + 1;
4328 char[] message = new char[messageLength];
4329 System.arraycopy(src, msgStart, message, 0, messageLength);
4330 this.foundTaskMessages[i] = message;
4334 // chech presence of task: tags
4335 // public void checkTaskTag(int commentStart, int commentEnd) {
4336 // // only look for newer task: tags
4337 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4340 // int foundTaskIndex = this.foundTaskCount;
4341 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4342 // char[] tag = null;
4343 // char[] priority = null;
4344 // // check for tag occurrence
4345 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4346 // tag = this.taskTags[itag];
4347 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4348 // int tagLength = tag.length;
4349 // for (int t = 0; t < tagLength; t++) {
4350 // if (this.source[i + t] != tag[t])
4351 // continue nextTag;
4353 // if (this.foundTaskTags == null) {
4354 // this.foundTaskTags = new char[5][];
4355 // this.foundTaskMessages = new char[5][];
4356 // this.foundTaskPriorities = new char[5][];
4357 // this.foundTaskPositions = new int[5][];
4358 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4359 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4360 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4361 // this.foundTaskCount);
4362 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4363 // this.foundTaskCount);
4364 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4365 // this.foundTaskCount);
4367 // this.foundTaskTags[this.foundTaskCount] = tag;
4368 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4369 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4370 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4371 // this.foundTaskCount++;
4372 // i += tagLength - 1; // will be incremented when looping
4375 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4376 // // retrieve message start and end positions
4377 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4378 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4379 // // at most beginning of next task
4380 // if (max_value < msgStart)
4381 // max_value = msgStart; // would only occur if tag is before EOF.
4384 // for (int j = msgStart; j < max_value; j++) {
4385 // if ((c = this.source[j]) == '\n' || c == '\r') {
4391 // for (int j = max_value; j > msgStart; j--) {
4392 // if ((c = this.source[j]) == '*') {
4400 // if (msgStart == end)
4401 // continue; // empty
4402 // // trim the message
4403 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4405 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4407 // // update the end position of the task
4408 // this.foundTaskPositions[i][1] = end;
4409 // // get the message source
4410 // final int messageLength = end - msgStart + 1;
4411 // char[] message = new char[messageLength];
4412 // System.arraycopy(source, msgStart, message, 0, messageLength);
4413 // this.foundTaskMessages[i] = message;