1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public Stack encapsedStringStack = null;
44 public char currentCharacter;
46 public int startPosition;
48 public int currentPosition;
50 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the
54 public boolean tokenizeComments;
56 public boolean tokenizeWhiteSpace;
58 public boolean tokenizeStrings;
60 //source should be viewed as a window (aka a part)
61 //of a entire very large stream
65 public char[] withoutUnicodeBuffer;
67 public int withoutUnicodePtr;
69 //when == 0 ==> no unicode in the current token
70 public boolean unicodeAsBackSlash = false;
72 public boolean scanningFloatLiteral = false;
74 //support for /** comments
75 public int[] commentStops = new int[10];
77 public int[] commentStarts = new int[10];
79 public int commentPtr = -1; // no comment test with commentPtr value -1
81 protected int lastCommentLinePosition = -1;
83 //diet parsing support - jump over some method body when requested
84 public boolean diet = false;
86 //support for the poor-line-debuggers ....
87 //remember the position of the cr/lf
88 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
92 public boolean wasAcr = false;
94 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
96 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
98 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
100 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
102 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
104 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
106 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
108 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
110 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
112 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
116 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
118 //----------------optimized identifier managment------------------
119 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
120 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
121 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
122 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
123 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
124 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
125 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
126 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
127 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
129 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
131 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
132 charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
133 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
134 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
135 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
136 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
137 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
138 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
140 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
142 static final int TableSize = 30, InternalTableSize = 6;
145 public static final int OptimizedLength = 6;
148 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
150 // support for detecting non-externalized string literals
151 int currentLineNr = -1;
153 int previousLineNr = -1;
155 NLSLine currentLine = null;
157 List lines = new ArrayList();
159 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
161 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
163 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
165 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
167 public StringLiteral[] nonNLSStrings = null;
169 public boolean checkNonExternalizedStringLiterals = true;
171 public boolean wasNonExternalizedStringLiteral = false;
173 for (int i = 0; i < 6; i++) {
174 for (int j = 0; j < TableSize; j++) {
175 for (int k = 0; k < InternalTableSize; k++) {
176 charArray_length[i][j][k] = initCharArray;
182 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
184 public static final int RoundBracket = 0;
186 public static final int SquareBracket = 1;
188 public static final int CurlyBracket = 2;
190 public static final int BracketKinds = 3;
193 public char[][] foundTaskTags = null;
195 public char[][] foundTaskMessages;
197 public char[][] foundTaskPriorities = null;
199 public int[][] foundTaskPositions;
201 public int foundTaskCount = 0;
203 public char[][] taskTags = null;
205 public char[][] taskPriorities = null;
207 public boolean isTaskCaseSensitive = true;
209 public static final boolean DEBUG = false;
211 public static final boolean TRACE = false;
213 public ICompilationUnit compilationUnit = null;
216 * Determines if the specified character is permissible as the first character in a PHP identifier or variable
218 * The '$' character for PHP variables is regarded as a correct first character !
221 public static boolean isPHPIdentOrVarStart(char ch) {
222 return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
226 * Determines if the specified character is permissible as the first character in a PHP identifier.
228 * The '$' character for PHP variables isn't regarded as the first character !
230 public static boolean isPHPIdentifierStart(char ch) {
231 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
235 * Determines if the specified character may be part of a PHP identifier as other than the first character
237 public static boolean isPHPIdentifierPart(char ch) {
238 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
241 public final boolean atEnd() {
242 // This code is not relevant if source is
243 // Only a part of the real stream input
244 return source.length == currentPosition;
247 public char[] getCurrentIdentifierSource() {
248 //return the token REAL source (aka unicodes are precomputed)
250 // if (withoutUnicodePtr != 0)
251 // //0 is used as a fast test flag so the real first char is in position 1
253 // withoutUnicodeBuffer,
255 // result = new char[withoutUnicodePtr],
257 // withoutUnicodePtr);
259 int length = currentPosition - startPosition;
260 switch (length) { // see OptimizedLength
262 return optimizedCurrentTokenSource1();
264 return optimizedCurrentTokenSource2();
266 return optimizedCurrentTokenSource3();
268 return optimizedCurrentTokenSource4();
270 return optimizedCurrentTokenSource5();
272 return optimizedCurrentTokenSource6();
275 System.arraycopy(source, startPosition, result = new char[length], 0, length);
280 public int getCurrentTokenEndPosition() {
281 return this.currentPosition - 1;
284 public final char[] getCurrentTokenSource() {
285 // Return the token REAL source (aka unicodes are precomputed)
287 // if (withoutUnicodePtr != 0)
288 // // 0 is used as a fast test flag so the real first char is in position 1
290 // withoutUnicodeBuffer,
292 // result = new char[withoutUnicodePtr],
294 // withoutUnicodePtr);
297 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
302 public final char[] getCurrentTokenSource(int startPos) {
303 // Return the token REAL source (aka unicodes are precomputed)
305 // if (withoutUnicodePtr != 0)
306 // // 0 is used as a fast test flag so the real first char is in position 1
308 // withoutUnicodeBuffer,
310 // result = new char[withoutUnicodePtr],
312 // withoutUnicodePtr);
315 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
320 public final char[] getCurrentTokenSourceString() {
321 //return the token REAL source (aka unicodes are precomputed).
322 //REMOVE the two " that are at the beginning and the end.
324 if (withoutUnicodePtr != 0)
325 //0 is used as a fast test flag so the real first char is in position 1
326 System.arraycopy(withoutUnicodeBuffer, 2,
327 //2 is 1 (real start) + 1 (to jump over the ")
328 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
331 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
336 public final char[] getRawTokenSourceEnd() {
337 int length = this.eofPosition - this.currentPosition - 1;
338 char[] sourceEnd = new char[length];
339 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
343 public int getCurrentTokenStartPosition() {
344 return this.startPosition;
347 public final char[] getCurrentStringLiteralSource() {
348 // Return the token REAL source (aka unicodes are precomputed)
349 if (startPosition + 1 >= currentPosition) {
354 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
359 public final char[] getCurrentStringLiteralSource(int startPos) {
360 // Return the token REAL source (aka unicodes are precomputed)
363 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
369 * Search the source position corresponding to the end of a given line number
371 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
373 * In case the given line number is inconsistent, answers -1.
375 public final int getLineEnd(int lineNumber) {
376 if (lineEnds == null)
378 if (lineNumber >= lineEnds.length)
382 if (lineNumber == lineEnds.length - 1)
384 return lineEnds[lineNumber - 1];
385 // next line start one character behind the lineEnd of the previous line
389 * Search the source position corresponding to the beginning of a given line number
391 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
393 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
395 * In case the given line number is inconsistent, answers -1.
397 public final int getLineStart(int lineNumber) {
398 if (lineEnds == null)
400 if (lineNumber >= lineEnds.length)
405 return initialPosition;
406 return lineEnds[lineNumber - 2] + 1;
407 // next line start one character behind the lineEnd of the previous line
410 public final boolean getNextChar(char testedChar) {
412 //handle the case of unicode.
413 //when a unicode appears then we must use a buffer that holds char
415 //At the end of this method currentCharacter holds the new visited char
416 //and currentPosition points right next after it
417 //Both previous lines are true if the currentCharacter is == to the
419 //On false, no side effect has occured.
420 //ALL getNextChar.... ARE OPTIMIZED COPIES
421 int temp = currentPosition;
423 currentCharacter = source[currentPosition++];
424 // if (((currentCharacter = source[currentPosition++]) == '\\')
425 // && (source[currentPosition] == 'u')) {
426 // //-------------unicode traitement ------------
427 // int c1, c2, c3, c4;
428 // int unicodeSize = 6;
429 // currentPosition++;
430 // while (source[currentPosition] == 'u') {
431 // currentPosition++;
435 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
437 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
439 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
441 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
443 // currentPosition = temp;
447 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
448 // if (currentCharacter != testedChar) {
449 // currentPosition = temp;
452 // unicodeAsBackSlash = currentCharacter == '\\';
454 // //need the unicode buffer
455 // if (withoutUnicodePtr == 0) {
456 // //buffer all the entries that have been left aside....
457 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
461 // withoutUnicodeBuffer,
463 // withoutUnicodePtr);
465 // //fill the buffer with the char
466 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
469 // } //-------------end unicode traitement--------------
471 if (currentCharacter != testedChar) {
472 currentPosition = temp;
475 unicodeAsBackSlash = false;
476 // if (withoutUnicodePtr != 0)
477 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
480 } catch (IndexOutOfBoundsException e) {
481 unicodeAsBackSlash = false;
482 currentPosition = temp;
487 public final int getNextChar(char testedChar1, char testedChar2) {
488 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
489 //test can be done with (x==0) for the first and (x>0) for the second
490 //handle the case of unicode.
491 //when a unicode appears then we must use a buffer that holds char
493 //At the end of this method currentCharacter holds the new visited char
494 //and currentPosition points right next after it
495 //Both previous lines are true if the currentCharacter is == to the
497 //On false, no side effect has occured.
498 //ALL getNextChar.... ARE OPTIMIZED COPIES
499 int temp = currentPosition;
502 currentCharacter = source[currentPosition++];
503 // if (((currentCharacter = source[currentPosition++]) == '\\')
504 // && (source[currentPosition] == 'u')) {
505 // //-------------unicode traitement ------------
506 // int c1, c2, c3, c4;
507 // int unicodeSize = 6;
508 // currentPosition++;
509 // while (source[currentPosition] == 'u') {
510 // currentPosition++;
514 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
516 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
518 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
520 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
522 // currentPosition = temp;
526 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
527 // if (currentCharacter == testedChar1)
529 // else if (currentCharacter == testedChar2)
532 // currentPosition = temp;
536 // //need the unicode buffer
537 // if (withoutUnicodePtr == 0) {
538 // //buffer all the entries that have been left aside....
539 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
543 // withoutUnicodeBuffer,
545 // withoutUnicodePtr);
547 // //fill the buffer with the char
548 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
550 // } //-------------end unicode traitement--------------
552 if (currentCharacter == testedChar1)
554 else if (currentCharacter == testedChar2)
557 currentPosition = temp;
560 // if (withoutUnicodePtr != 0)
561 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
564 } catch (IndexOutOfBoundsException e) {
565 currentPosition = temp;
570 public final boolean getNextCharAsDigit() {
572 //handle the case of unicode.
573 //when a unicode appears then we must use a buffer that holds char
575 //At the end of this method currentCharacter holds the new visited char
576 //and currentPosition points right next after it
577 //Both previous lines are true if the currentCharacter is a digit
578 //On false, no side effect has occured.
579 //ALL getNextChar.... ARE OPTIMIZED COPIES
580 int temp = currentPosition;
582 currentCharacter = source[currentPosition++];
583 // if (((currentCharacter = source[currentPosition++]) == '\\')
584 // && (source[currentPosition] == 'u')) {
585 // //-------------unicode traitement ------------
586 // int c1, c2, c3, c4;
587 // int unicodeSize = 6;
588 // currentPosition++;
589 // while (source[currentPosition] == 'u') {
590 // currentPosition++;
594 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
596 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
598 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
600 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
602 // currentPosition = temp;
606 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
607 // if (!Character.isDigit(currentCharacter)) {
608 // currentPosition = temp;
612 // //need the unicode buffer
613 // if (withoutUnicodePtr == 0) {
614 // //buffer all the entries that have been left aside....
615 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
619 // withoutUnicodeBuffer,
621 // withoutUnicodePtr);
623 // //fill the buffer with the char
624 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
626 // } //-------------end unicode traitement--------------
628 if (!Character.isDigit(currentCharacter)) {
629 currentPosition = temp;
632 // if (withoutUnicodePtr != 0)
633 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
636 } catch (IndexOutOfBoundsException e) {
637 currentPosition = temp;
642 public final boolean getNextCharAsDigit(int radix) {
644 //handle the case of unicode.
645 //when a unicode appears then we must use a buffer that holds char
647 //At the end of this method currentCharacter holds the new visited char
648 //and currentPosition points right next after it
649 //Both previous lines are true if the currentCharacter is a digit base on
651 //On false, no side effect has occured.
652 //ALL getNextChar.... ARE OPTIMIZED COPIES
653 int temp = currentPosition;
655 currentCharacter = source[currentPosition++];
656 // if (((currentCharacter = source[currentPosition++]) == '\\')
657 // && (source[currentPosition] == 'u')) {
658 // //-------------unicode traitement ------------
659 // int c1, c2, c3, c4;
660 // int unicodeSize = 6;
661 // currentPosition++;
662 // while (source[currentPosition] == 'u') {
663 // currentPosition++;
667 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
669 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
671 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
673 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
675 // currentPosition = temp;
679 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
680 // if (Character.digit(currentCharacter, radix) == -1) {
681 // currentPosition = temp;
685 // //need the unicode buffer
686 // if (withoutUnicodePtr == 0) {
687 // //buffer all the entries that have been left aside....
688 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
692 // withoutUnicodeBuffer,
694 // withoutUnicodePtr);
696 // //fill the buffer with the char
697 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
699 // } //-------------end unicode traitement--------------
701 if (Character.digit(currentCharacter, radix) == -1) {
702 currentPosition = temp;
705 // if (withoutUnicodePtr != 0)
706 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
709 } catch (IndexOutOfBoundsException e) {
710 currentPosition = temp;
715 public boolean getNextCharAsJavaIdentifierPart() {
717 //handle the case of unicode.
718 //when a unicode appears then we must use a buffer that holds char
720 //At the end of this method currentCharacter holds the new visited char
721 //and currentPosition points right next after it
722 //Both previous lines are true if the currentCharacter is a
723 // JavaIdentifierPart
724 //On false, no side effect has occured.
725 //ALL getNextChar.... ARE OPTIMIZED COPIES
726 int temp = currentPosition;
728 currentCharacter = source[currentPosition++];
729 // if (((currentCharacter = source[currentPosition++]) == '\\')
730 // && (source[currentPosition] == 'u')) {
731 // //-------------unicode traitement ------------
732 // int c1, c2, c3, c4;
733 // int unicodeSize = 6;
734 // currentPosition++;
735 // while (source[currentPosition] == 'u') {
736 // currentPosition++;
740 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
742 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
744 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
746 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
748 // currentPosition = temp;
752 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
753 // if (!isPHPIdentifierPart(currentCharacter)) {
754 // currentPosition = temp;
758 // //need the unicode buffer
759 // if (withoutUnicodePtr == 0) {
760 // //buffer all the entries that have been left aside....
761 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
765 // withoutUnicodeBuffer,
767 // withoutUnicodePtr);
769 // //fill the buffer with the char
770 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
772 // } //-------------end unicode traitement--------------
774 if (!isPHPIdentifierPart(currentCharacter)) {
775 currentPosition = temp;
778 // if (withoutUnicodePtr != 0)
779 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
782 } catch (IndexOutOfBoundsException e) {
783 currentPosition = temp;
788 public int getCastOrParen() {
789 int tempPosition = currentPosition;
790 char tempCharacter = currentCharacter;
791 int tempToken = TokenNameLPAREN;
792 boolean found = false;
793 StringBuffer buf = new StringBuffer();
796 currentCharacter = source[currentPosition++];
797 } while (currentCharacter == ' ' || currentCharacter == '\t');
798 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
799 buf.append(currentCharacter);
800 currentCharacter = source[currentPosition++];
802 if (buf.length() >= 3 && buf.length() <= 7) {
803 char[] data = buf.toString().toCharArray();
805 switch (data.length) {
808 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
810 tempToken = TokenNameintCAST;
815 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
817 tempToken = TokenNameboolCAST;
820 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
822 tempToken = TokenNamedoubleCAST;
828 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
829 && (data[++index] == 'y')) {
831 tempToken = TokenNamearrayCAST;
834 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
835 && (data[++index] == 't')) {
837 tempToken = TokenNameunsetCAST;
840 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
841 && (data[++index] == 't')) {
843 tempToken = TokenNamedoubleCAST;
849 // object string double
850 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
851 && (data[++index] == 'c') && (data[++index] == 't')) {
853 tempToken = TokenNameobjectCAST;
856 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
857 && (data[++index] == 'n') && (data[++index] == 'g')) {
859 tempToken = TokenNamestringCAST;
862 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
863 && (data[++index] == 'l') && (data[++index] == 'e')) {
865 tempToken = TokenNamedoubleCAST;
872 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
873 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
875 tempToken = TokenNameboolCAST;
878 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
879 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
881 tempToken = TokenNameintCAST;
887 while (currentCharacter == ' ' || currentCharacter == '\t') {
888 currentCharacter = source[currentPosition++];
890 if (currentCharacter == ')') {
895 } catch (IndexOutOfBoundsException e) {
897 currentCharacter = tempCharacter;
898 currentPosition = tempPosition;
899 return TokenNameLPAREN;
902 public void consumeStringInterpolated() throws InvalidInputException {
904 // consume next character
905 unicodeAsBackSlash = false;
906 currentCharacter = source[currentPosition++];
907 // if (((currentCharacter = source[currentPosition++]) == '\\')
908 // && (source[currentPosition] == 'u')) {
909 // getNextUnicodeChar();
911 // if (withoutUnicodePtr != 0) {
912 // withoutUnicodeBuffer[++withoutUnicodePtr] =
916 while (currentCharacter != '`') {
917 /** ** in PHP \r and \n are valid in string literals *** */
918 // if ((currentCharacter == '\n')
919 // || (currentCharacter == '\r')) {
920 // // relocate if finding another quote fairly close: thus unicode
921 // '/u000D' will be fully consumed
922 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
923 // if (currentPosition + lookAhead == source.length)
925 // if (source[currentPosition + lookAhead] == '\n')
927 // if (source[currentPosition + lookAhead] == '\"') {
928 // currentPosition += lookAhead + 1;
932 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
934 if (currentCharacter == '\\') {
935 int escapeSize = currentPosition;
936 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
937 //scanEscapeCharacter make a side effect on this value and we need
938 // the previous value few lines down this one
939 scanDoubleQuotedEscapeCharacter();
940 escapeSize = currentPosition - escapeSize;
941 if (withoutUnicodePtr == 0) {
942 //buffer all the entries that have been left aside....
943 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
944 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
945 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
946 } else { //overwrite the / in the buffer
947 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
948 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
949 // where only one is correct
953 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
954 if (recordLineSeparator) {
958 // consume next character
959 unicodeAsBackSlash = false;
960 currentCharacter = source[currentPosition++];
961 // if (((currentCharacter = source[currentPosition++]) == '\\')
962 // && (source[currentPosition] == 'u')) {
963 // getNextUnicodeChar();
965 if (withoutUnicodePtr != 0) {
966 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
970 } catch (IndexOutOfBoundsException e) {
971 // reset end position for error reporting
972 currentPosition -= 2;
973 throw new InvalidInputException(UNTERMINATED_STRING);
974 } catch (InvalidInputException e) {
975 if (e.getMessage().equals(INVALID_ESCAPE)) {
976 // relocate if finding another quote fairly close: thus unicode
977 // '/u000D' will be fully consumed
978 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
979 if (currentPosition + lookAhead == source.length)
981 if (source[currentPosition + lookAhead] == '\n')
983 if (source[currentPosition + lookAhead] == '`') {
984 currentPosition += lookAhead + 1;
991 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
992 // //$NON-NLS-?$ where ? is an
994 if (currentLine == null) {
995 currentLine = new NLSLine();
996 lines.add(currentLine);
998 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1002 public void consumeStringConstant() throws InvalidInputException {
1004 // consume next character
1005 unicodeAsBackSlash = false;
1006 currentCharacter = source[currentPosition++];
1007 // if (((currentCharacter = source[currentPosition++]) == '\\')
1008 // && (source[currentPosition] == 'u')) {
1009 // getNextUnicodeChar();
1011 // if (withoutUnicodePtr != 0) {
1012 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1013 // currentCharacter;
1016 while (currentCharacter != '\'') {
1017 /** ** in PHP \r and \n are valid in string literals *** */
1018 // if ((currentCharacter == '\n')
1019 // || (currentCharacter == '\r')) {
1020 // // relocate if finding another quote fairly close: thus unicode
1021 // '/u000D' will be fully consumed
1022 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1023 // if (currentPosition + lookAhead == source.length)
1025 // if (source[currentPosition + lookAhead] == '\n')
1027 // if (source[currentPosition + lookAhead] == '\"') {
1028 // currentPosition += lookAhead + 1;
1032 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1034 if (currentCharacter == '\\') {
1035 int escapeSize = currentPosition;
1036 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1037 //scanEscapeCharacter make a side effect on this value and we need
1038 // the previous value few lines down this one
1039 scanSingleQuotedEscapeCharacter();
1040 escapeSize = currentPosition - escapeSize;
1041 if (withoutUnicodePtr == 0) {
1042 //buffer all the entries that have been left aside....
1043 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1044 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1045 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1046 } else { //overwrite the / in the buffer
1047 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1048 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1049 // where only one is correct
1050 withoutUnicodePtr--;
1053 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1054 if (recordLineSeparator) {
1055 pushLineSeparator();
1058 // consume next character
1059 unicodeAsBackSlash = false;
1060 currentCharacter = source[currentPosition++];
1061 // if (((currentCharacter = source[currentPosition++]) == '\\')
1062 // && (source[currentPosition] == 'u')) {
1063 // getNextUnicodeChar();
1065 if (withoutUnicodePtr != 0) {
1066 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1070 } catch (IndexOutOfBoundsException e) {
1071 // reset end position for error reporting
1072 currentPosition -= 2;
1073 throw new InvalidInputException(UNTERMINATED_STRING);
1074 } catch (InvalidInputException e) {
1075 if (e.getMessage().equals(INVALID_ESCAPE)) {
1076 // relocate if finding another quote fairly close: thus unicode
1077 // '/u000D' will be fully consumed
1078 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1079 if (currentPosition + lookAhead == source.length)
1081 if (source[currentPosition + lookAhead] == '\n')
1083 if (source[currentPosition + lookAhead] == '\'') {
1084 currentPosition += lookAhead + 1;
1091 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1092 // //$NON-NLS-?$ where ? is an
1094 if (currentLine == null) {
1095 currentLine = new NLSLine();
1096 lines.add(currentLine);
1098 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1102 public void consumeStringLiteral() throws InvalidInputException {
1104 // consume next character
1105 unicodeAsBackSlash = false;
1106 currentCharacter = source[currentPosition++];
1107 // if (((currentCharacter = source[currentPosition++]) == '\\')
1108 // && (source[currentPosition] == 'u')) {
1109 // getNextUnicodeChar();
1111 // if (withoutUnicodePtr != 0) {
1112 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1113 // currentCharacter;
1116 while (currentCharacter != '"') {
1117 /** ** in PHP \r and \n are valid in string literals *** */
1118 // if ((currentCharacter == '\n')
1119 // || (currentCharacter == '\r')) {
1120 // // relocate if finding another quote fairly close: thus unicode
1121 // '/u000D' will be fully consumed
1122 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1123 // if (currentPosition + lookAhead == source.length)
1125 // if (source[currentPosition + lookAhead] == '\n')
1127 // if (source[currentPosition + lookAhead] == '\"') {
1128 // currentPosition += lookAhead + 1;
1132 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1134 if (currentCharacter == '\\') {
1135 int escapeSize = currentPosition;
1136 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1137 //scanEscapeCharacter make a side effect on this value and we need
1138 // the previous value few lines down this one
1139 scanDoubleQuotedEscapeCharacter();
1140 escapeSize = currentPosition - escapeSize;
1141 if (withoutUnicodePtr == 0) {
1142 //buffer all the entries that have been left aside....
1143 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1144 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1145 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1146 } else { //overwrite the / in the buffer
1147 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1148 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1149 // where only one is correct
1150 withoutUnicodePtr--;
1153 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1154 if (recordLineSeparator) {
1155 pushLineSeparator();
1158 // consume next character
1159 unicodeAsBackSlash = false;
1160 currentCharacter = source[currentPosition++];
1161 // if (((currentCharacter = source[currentPosition++]) == '\\')
1162 // && (source[currentPosition] == 'u')) {
1163 // getNextUnicodeChar();
1165 if (withoutUnicodePtr != 0) {
1166 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1170 } catch (IndexOutOfBoundsException e) {
1171 // reset end position for error reporting
1172 currentPosition -= 2;
1173 throw new InvalidInputException(UNTERMINATED_STRING);
1174 } catch (InvalidInputException e) {
1175 if (e.getMessage().equals(INVALID_ESCAPE)) {
1176 // relocate if finding another quote fairly close: thus unicode
1177 // '/u000D' will be fully consumed
1178 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1179 if (currentPosition + lookAhead == source.length)
1181 if (source[currentPosition + lookAhead] == '\n')
1183 if (source[currentPosition + lookAhead] == '\"') {
1184 currentPosition += lookAhead + 1;
1191 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1192 // //$NON-NLS-?$ where ? is an
1194 if (currentLine == null) {
1195 currentLine = new NLSLine();
1196 lines.add(currentLine);
1198 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1202 public int getNextToken() throws InvalidInputException {
1204 return getInlinedHTML(currentPosition);
1207 this.wasAcr = false;
1209 jumpOverMethodBody();
1211 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1215 withoutUnicodePtr = 0;
1216 //start with a new token
1217 char encapsedChar = ' ';
1218 if (!encapsedStringStack.isEmpty()) {
1219 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1221 if (encapsedChar != '$' && encapsedChar != ' ') {
1222 currentCharacter = source[currentPosition++];
1223 if (currentCharacter == encapsedChar) {
1224 switch (currentCharacter) {
1226 return TokenNameEncapsedString0;
1228 return TokenNameEncapsedString1;
1230 return TokenNameEncapsedString2;
1233 while (currentCharacter != encapsedChar) {
1234 /** ** in PHP \r and \n are valid in string literals *** */
1235 switch (currentCharacter) {
1237 int escapeSize = currentPosition;
1238 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1239 //scanEscapeCharacter make a side effect on this value and
1240 // we need the previous value few lines down this one
1241 scanDoubleQuotedEscapeCharacter();
1242 escapeSize = currentPosition - escapeSize;
1243 if (withoutUnicodePtr == 0) {
1244 //buffer all the entries that have been left aside....
1245 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1246 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1247 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1248 } else { //overwrite the / in the buffer
1249 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1250 if (backSlashAsUnicodeInString) { //there are TWO \ in
1251 withoutUnicodePtr--;
1257 if (recordLineSeparator) {
1258 pushLineSeparator();
1262 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1264 encapsedStringStack.push(new Character('$'));
1265 return TokenNameSTRING;
1269 if (source[currentPosition] == '$') { // CURLY_OPEN
1271 encapsedStringStack.push(new Character('$'));
1272 return TokenNameSTRING;
1275 // consume next character
1276 unicodeAsBackSlash = false;
1277 currentCharacter = source[currentPosition++];
1278 if (withoutUnicodePtr != 0) {
1279 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1284 return TokenNameSTRING;
1286 // ---------Consume white space and handles startPosition---------
1287 int whiteStart = currentPosition;
1288 startPosition = currentPosition;
1289 currentCharacter = source[currentPosition++];
1290 if (encapsedChar == '$') {
1291 switch (currentCharacter) {
1293 currentCharacter = source[currentPosition++];
1294 return TokenNameSTRING;
1296 if (encapsedChar == '$') {
1297 if (getNextChar('$'))
1298 return TokenNameLBRACE_DOLLAR;
1300 return TokenNameLBRACE;
1302 return TokenNameRBRACE;
1304 return TokenNameLBRACKET;
1306 return TokenNameRBRACKET;
1308 if (tokenizeStrings) {
1309 consumeStringConstant();
1310 return TokenNameStringSingleQuote;
1312 return TokenNameEncapsedString1;
1314 return TokenNameEncapsedString2;
1316 if (tokenizeStrings) {
1317 consumeStringInterpolated();
1318 return TokenNameStringInterpolated;
1320 return TokenNameEncapsedString0;
1322 if (getNextChar('>'))
1323 return TokenNameMINUS_GREATER;
1324 return TokenNameSTRING;
1326 if (currentCharacter == '$') {
1327 int oldPosition = currentPosition;
1329 currentCharacter = source[currentPosition++];
1330 if (currentCharacter == '{') {
1331 return TokenNameDOLLAR_LBRACE;
1333 if (isPHPIdentifierStart(currentCharacter)) {
1334 return scanIdentifierOrKeyword(true);
1336 currentPosition = oldPosition;
1337 return TokenNameSTRING;
1339 } catch (IndexOutOfBoundsException e) {
1340 currentPosition = oldPosition;
1341 return TokenNameSTRING;
1344 if (isPHPIdentifierStart(currentCharacter))
1345 return scanIdentifierOrKeyword(false);
1346 if (Character.isDigit(currentCharacter))
1347 return scanNumber(false);
1348 return TokenNameERROR;
1351 // boolean isWhiteSpace;
1353 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1354 startPosition = currentPosition;
1355 currentCharacter = source[currentPosition++];
1356 // if (((currentCharacter = source[currentPosition++]) == '\\')
1357 // && (source[currentPosition] == 'u')) {
1358 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1360 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1361 checkNonExternalizeString();
1362 if (recordLineSeparator) {
1363 pushLineSeparator();
1368 // isWhiteSpace = (currentCharacter == ' ')
1369 // || Character.isWhitespace(currentCharacter);
1372 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1373 // reposition scanner in case we are interested by spaces as tokens
1375 startPosition = whiteStart;
1376 return TokenNameWHITESPACE;
1378 //little trick to get out in the middle of a source compuation
1379 if (currentPosition > eofPosition)
1380 return TokenNameEOF;
1381 // ---------Identify the next token-------------
1382 switch (currentCharacter) {
1384 return getCastOrParen();
1386 return TokenNameRPAREN;
1388 return TokenNameLBRACE;
1390 return TokenNameRBRACE;
1392 return TokenNameLBRACKET;
1394 return TokenNameRBRACKET;
1396 return TokenNameSEMICOLON;
1398 return TokenNameCOMMA;
1400 if (getNextChar('='))
1401 return TokenNameDOT_EQUAL;
1402 if (getNextCharAsDigit())
1403 return scanNumber(true);
1404 return TokenNameDOT;
1407 if ((test = getNextChar('+', '=')) == 0)
1408 return TokenNamePLUS_PLUS;
1410 return TokenNamePLUS_EQUAL;
1411 return TokenNamePLUS;
1415 if ((test = getNextChar('-', '=')) == 0)
1416 return TokenNameMINUS_MINUS;
1418 return TokenNameMINUS_EQUAL;
1419 if (getNextChar('>'))
1420 return TokenNameMINUS_GREATER;
1421 return TokenNameMINUS;
1424 if (getNextChar('='))
1425 return TokenNameTWIDDLE_EQUAL;
1426 return TokenNameTWIDDLE;
1428 if (getNextChar('=')) {
1429 if (getNextChar('=')) {
1430 return TokenNameNOT_EQUAL_EQUAL;
1432 return TokenNameNOT_EQUAL;
1434 return TokenNameNOT;
1436 if (getNextChar('='))
1437 return TokenNameMULTIPLY_EQUAL;
1438 return TokenNameMULTIPLY;
1440 if (getNextChar('='))
1441 return TokenNameREMAINDER_EQUAL;
1442 return TokenNameREMAINDER;
1444 int oldPosition = currentPosition;
1446 currentCharacter = source[currentPosition++];
1447 } catch (IndexOutOfBoundsException e) {
1448 currentPosition = oldPosition;
1449 return TokenNameLESS;
1451 switch (currentCharacter) {
1453 return TokenNameLESS_EQUAL;
1455 return TokenNameNOT_EQUAL;
1457 if (getNextChar('='))
1458 return TokenNameLEFT_SHIFT_EQUAL;
1459 if (getNextChar('<')) {
1460 currentCharacter = source[currentPosition++];
1461 while (Character.isWhitespace(currentCharacter)) {
1462 currentCharacter = source[currentPosition++];
1464 int heredocStart = currentPosition - 1;
1465 int heredocLength = 0;
1466 if (isPHPIdentifierStart(currentCharacter)) {
1467 currentCharacter = source[currentPosition++];
1469 return TokenNameERROR;
1471 while (isPHPIdentifierPart(currentCharacter)) {
1472 currentCharacter = source[currentPosition++];
1474 heredocLength = currentPosition - heredocStart - 1;
1475 // heredoc end-tag determination
1476 boolean endTag = true;
1479 ch = source[currentPosition++];
1480 if (ch == '\r' || ch == '\n') {
1481 if (recordLineSeparator) {
1482 pushLineSeparator();
1486 for (int i = 0; i < heredocLength; i++) {
1487 if (source[currentPosition + i] != source[heredocStart + i]) {
1493 currentPosition += heredocLength - 1;
1494 currentCharacter = source[currentPosition++];
1495 break; // do...while loop
1501 return TokenNameHEREDOC;
1503 return TokenNameLEFT_SHIFT;
1505 currentPosition = oldPosition;
1506 return TokenNameLESS;
1510 if ((test = getNextChar('=', '>')) == 0)
1511 return TokenNameGREATER_EQUAL;
1513 if ((test = getNextChar('=', '>')) == 0)
1514 return TokenNameRIGHT_SHIFT_EQUAL;
1515 return TokenNameRIGHT_SHIFT;
1517 return TokenNameGREATER;
1520 if (getNextChar('=')) {
1521 if (getNextChar('=')) {
1522 return TokenNameEQUAL_EQUAL_EQUAL;
1524 return TokenNameEQUAL_EQUAL;
1526 if (getNextChar('>'))
1527 return TokenNameEQUAL_GREATER;
1528 return TokenNameEQUAL;
1531 if ((test = getNextChar('&', '=')) == 0)
1532 return TokenNameAND_AND;
1534 return TokenNameAND_EQUAL;
1535 return TokenNameAND;
1539 if ((test = getNextChar('|', '=')) == 0)
1540 return TokenNameOR_OR;
1542 return TokenNameOR_EQUAL;
1546 if (getNextChar('='))
1547 return TokenNameXOR_EQUAL;
1548 return TokenNameXOR;
1550 if (getNextChar('>')) {
1552 if (currentPosition == source.length) {
1554 return TokenNameINLINE_HTML;
1556 return getInlinedHTML(currentPosition - 2);
1558 return TokenNameQUESTION;
1560 if (getNextChar(':'))
1561 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1562 return TokenNameCOLON;
1566 consumeStringConstant();
1567 return TokenNameStringSingleQuote;
1569 if (tokenizeStrings) {
1570 consumeStringLiteral();
1571 return TokenNameStringDoubleQuote;
1573 return TokenNameEncapsedString2;
1575 if (tokenizeStrings) {
1576 consumeStringInterpolated();
1577 return TokenNameStringInterpolated;
1579 return TokenNameEncapsedString0;
1582 char startChar = currentCharacter;
1583 if (getNextChar('=') && startChar == '/') {
1584 return TokenNameDIVIDE_EQUAL;
1587 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1589 this.lastCommentLinePosition = this.currentPosition;
1590 int endPositionForLineComment = 0;
1591 try { //get the next char
1592 currentCharacter = source[currentPosition++];
1593 // if (((currentCharacter = source[currentPosition++])
1595 // && (source[currentPosition] == 'u')) {
1596 // //-------------unicode traitement ------------
1597 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1598 // currentPosition++;
1599 // while (source[currentPosition] == 'u') {
1600 // currentPosition++;
1603 // Character.getNumericValue(source[currentPosition++]))
1607 // Character.getNumericValue(source[currentPosition++]))
1611 // Character.getNumericValue(source[currentPosition++]))
1615 // Character.getNumericValue(source[currentPosition++]))
1619 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1621 // currentCharacter =
1622 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1625 //handle the \\u case manually into comment
1626 // if (currentCharacter == '\\') {
1627 // if (source[currentPosition] == '\\')
1628 // currentPosition++;
1629 // } //jump over the \\
1630 boolean isUnicode = false;
1631 while (currentCharacter != '\r' && currentCharacter != '\n') {
1632 this.lastCommentLinePosition = this.currentPosition;
1633 if (currentCharacter == '?') {
1634 if (getNextChar('>')) {
1635 startPosition = currentPosition - 2;
1637 return TokenNameINLINE_HTML;
1642 currentCharacter = source[currentPosition++];
1643 // if (((currentCharacter = source[currentPosition++])
1645 // && (source[currentPosition] == 'u')) {
1646 // isUnicode = true;
1647 // //-------------unicode traitement ------------
1648 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1649 // currentPosition++;
1650 // while (source[currentPosition] == 'u') {
1651 // currentPosition++;
1654 // Character.getNumericValue(source[currentPosition++]))
1658 // Character.getNumericValue(
1659 // source[currentPosition++]))
1663 // Character.getNumericValue(
1664 // source[currentPosition++]))
1668 // Character.getNumericValue(
1669 // source[currentPosition++]))
1673 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1675 // currentCharacter =
1676 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1679 //handle the \\u case manually into comment
1680 // if (currentCharacter == '\\') {
1681 // if (source[currentPosition] == '\\')
1682 // currentPosition++;
1683 // } //jump over the \\
1686 endPositionForLineComment = currentPosition - 6;
1688 endPositionForLineComment = currentPosition - 1;
1690 // recordComment(false);
1691 recordComment(TokenNameCOMMENT_LINE);
1692 if (this.taskTags != null)
1693 checkTaskTag(this.startPosition, this.currentPosition);
1694 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1695 checkNonExternalizeString();
1696 if (recordLineSeparator) {
1698 pushUnicodeLineSeparator();
1700 pushLineSeparator();
1706 if (tokenizeComments) {
1708 currentPosition = endPositionForLineComment;
1709 // reset one character behind
1711 return TokenNameCOMMENT_LINE;
1713 } catch (IndexOutOfBoundsException e) { //an eof will them
1715 if (tokenizeComments) {
1717 // reset one character behind
1718 return TokenNameCOMMENT_LINE;
1724 //traditional and annotation comment
1725 boolean isJavadoc = false, star = false;
1726 // consume next character
1727 unicodeAsBackSlash = false;
1728 currentCharacter = source[currentPosition++];
1729 // if (((currentCharacter = source[currentPosition++]) ==
1731 // && (source[currentPosition] == 'u')) {
1732 // getNextUnicodeChar();
1734 // if (withoutUnicodePtr != 0) {
1735 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1736 // currentCharacter;
1739 if (currentCharacter == '*') {
1743 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1744 checkNonExternalizeString();
1745 if (recordLineSeparator) {
1746 pushLineSeparator();
1751 try { //get the next char
1752 currentCharacter = source[currentPosition++];
1753 // if (((currentCharacter = source[currentPosition++])
1755 // && (source[currentPosition] == 'u')) {
1756 // //-------------unicode traitement ------------
1757 // getNextUnicodeChar();
1759 //handle the \\u case manually into comment
1760 // if (currentCharacter == '\\') {
1761 // if (source[currentPosition] == '\\')
1762 // currentPosition++;
1763 // //jump over the \\
1765 // empty comment is not a javadoc /**/
1766 if (currentCharacter == '/') {
1769 //loop until end of comment */
1770 while ((currentCharacter != '/') || (!star)) {
1771 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1772 checkNonExternalizeString();
1773 if (recordLineSeparator) {
1774 pushLineSeparator();
1779 star = currentCharacter == '*';
1781 currentCharacter = source[currentPosition++];
1782 // if (((currentCharacter = source[currentPosition++])
1784 // && (source[currentPosition] == 'u')) {
1785 // //-------------unicode traitement ------------
1786 // getNextUnicodeChar();
1788 //handle the \\u case manually into comment
1789 // if (currentCharacter == '\\') {
1790 // if (source[currentPosition] == '\\')
1791 // currentPosition++;
1792 // } //jump over the \\
1794 //recordComment(isJavadoc);
1796 recordComment(TokenNameCOMMENT_PHPDOC);
1798 recordComment(TokenNameCOMMENT_BLOCK);
1801 if (tokenizeComments) {
1803 return TokenNameCOMMENT_PHPDOC;
1804 return TokenNameCOMMENT_BLOCK;
1807 if (this.taskTags != null) {
1808 checkTaskTag(this.startPosition, this.currentPosition);
1810 } catch (IndexOutOfBoundsException e) {
1811 // reset end position for error reporting
1812 currentPosition -= 2;
1813 throw new InvalidInputException(UNTERMINATED_COMMENT);
1817 return TokenNameDIVIDE;
1821 return TokenNameEOF;
1822 //the atEnd may not be <currentPosition == source.length> if
1823 // source is only some part of a real (external) stream
1824 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1826 if (currentCharacter == '$') {
1827 int oldPosition = currentPosition;
1829 currentCharacter = source[currentPosition++];
1830 if (isPHPIdentifierStart(currentCharacter)) {
1831 return scanIdentifierOrKeyword(true);
1833 currentPosition = oldPosition;
1834 return TokenNameDOLLAR;
1836 } catch (IndexOutOfBoundsException e) {
1837 currentPosition = oldPosition;
1838 return TokenNameDOLLAR;
1841 if (isPHPIdentifierStart(currentCharacter))
1842 return scanIdentifierOrKeyword(false);
1843 if (Character.isDigit(currentCharacter))
1844 return scanNumber(false);
1845 return TokenNameERROR;
1848 } //-----------------end switch while try--------------------
1849 catch (IndexOutOfBoundsException e) {
1852 return TokenNameEOF;
1855 private int getInlinedHTML(int start) throws InvalidInputException {
1856 int token = getInlinedHTMLToken(start);
1857 if (token == TokenNameINLINE_HTML) {
1858 // Stack stack = new Stack();
1859 // // scan html for errors
1860 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1861 // int lastPHPEndPos=0;
1862 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1863 // Tag tag=(Tag)i.next();
1865 // if (tag instanceof StartTag) {
1866 // StartTag startTag=(StartTag)tag;
1867 // // System.out.println("startTag: "+tag);
1868 // if (startTag.isServerTag()) {
1869 // // TODO : what to do with a server tag ?
1871 // // do whatever with HTML start tag
1872 // // use startTag.getElement() to find the element corresponding
1873 // // to this start tag which may be useful if you implement code
1875 // stack.push(startTag);
1878 // EndTag endTag=(EndTag)tag;
1879 // StartTag stag = (StartTag) stack.peek();
1880 //// System.out.println("endTag: "+tag);
1881 // // do whatever with HTML end tag.
1890 * @throws InvalidInputException
1892 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1893 if (currentPosition > source.length) {
1894 currentPosition = source.length;
1895 return TokenNameEOF;
1897 startPosition = start;
1900 currentCharacter = source[currentPosition++];
1901 if (currentCharacter == '<') {
1902 if (getNextChar('?')) {
1903 currentCharacter = source[currentPosition++];
1904 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1905 if (currentCharacter != '=') { // <?=
1909 if (ignorePHPOneLiner) {
1910 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1912 return TokenNameINLINE_HTML;
1916 return TokenNameINLINE_HTML;
1919 // boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1921 int test = getNextChar('H', 'h');
1923 test = getNextChar('P', 'p');
1926 if (ignorePHPOneLiner) {
1927 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1929 return TokenNameINLINE_HTML;
1933 return TokenNameINLINE_HTML;
1941 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1942 if (recordLineSeparator) {
1943 pushLineSeparator();
1948 } //-----------------while--------------------
1950 return TokenNameINLINE_HTML;
1951 } //-----------------try--------------------
1952 catch (IndexOutOfBoundsException e) {
1953 startPosition = start;
1957 return TokenNameINLINE_HTML;
1963 private int lookAheadLinePHPTag() {
1964 // check if the PHP is only in this line (for CodeFormatter)
1965 int currentPositionInLine = currentPosition;
1966 char previousCharInLine = ' ';
1967 char currentCharInLine = ' ';
1968 boolean singleQuotedStringActive = false;
1969 boolean doubleQuotedStringActive = false;
1972 // look ahead in this line
1974 previousCharInLine = currentCharInLine;
1975 currentCharInLine = source[currentPositionInLine++];
1976 switch (currentCharInLine) {
1978 if (previousCharInLine == '?') {
1979 // update the scanner's current Position in the source
1980 currentPosition = currentPositionInLine;
1981 // use as "dummy" token
1982 return TokenNameEOF;
1986 if (doubleQuotedStringActive) {
1987 if (previousCharInLine != '\\') {
1988 doubleQuotedStringActive = false;
1991 if (!singleQuotedStringActive) {
1992 doubleQuotedStringActive = true;
1997 if (singleQuotedStringActive) {
1998 if (previousCharInLine != '\\') {
1999 singleQuotedStringActive = false;
2002 if (!doubleQuotedStringActive) {
2003 singleQuotedStringActive = true;
2009 return TokenNameINLINE_HTML;
2011 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2013 return TokenNameINLINE_HTML;
2017 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2019 return TokenNameINLINE_HTML;
2023 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2025 return TokenNameINLINE_HTML;
2030 } catch (IndexOutOfBoundsException e) {
2032 currentPosition = currentPositionInLine;
2033 return TokenNameINLINE_HTML;
2037 // public final void getNextUnicodeChar()
2038 // throws IndexOutOfBoundsException, InvalidInputException {
2040 // //handle the case of unicode.
2041 // //when a unicode appears then we must use a buffer that holds char
2043 // //At the end of this method currentCharacter holds the new visited char
2044 // //and currentPosition points right next after it
2046 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2048 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2049 // currentPosition++;
2050 // while (source[currentPosition] == 'u') {
2051 // currentPosition++;
2055 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2057 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2059 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2061 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2063 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2065 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2066 // //need the unicode buffer
2067 // if (withoutUnicodePtr == 0) {
2068 // //buffer all the entries that have been left aside....
2069 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2070 // System.arraycopy(
2073 // withoutUnicodeBuffer,
2075 // withoutUnicodePtr);
2077 // //fill the buffer with the char
2078 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2080 // unicodeAsBackSlash = currentCharacter == '\\';
2083 * Tokenize a method body, assuming that curly brackets are properly balanced.
2085 public final void jumpOverMethodBody() {
2086 this.wasAcr = false;
2089 while (true) { //loop for jumping over comments
2090 // ---------Consume white space and handles startPosition---------
2091 boolean isWhiteSpace;
2093 startPosition = currentPosition;
2094 currentCharacter = source[currentPosition++];
2095 // if (((currentCharacter = source[currentPosition++]) == '\\')
2096 // && (source[currentPosition] == 'u')) {
2097 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2099 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2100 pushLineSeparator();
2101 isWhiteSpace = Character.isWhitespace(currentCharacter);
2103 } while (isWhiteSpace);
2104 // -------consume token until } is found---------
2105 switch (currentCharacter) {
2116 test = getNextChar('\\');
2119 scanDoubleQuotedEscapeCharacter();
2120 } catch (InvalidInputException ex) {
2124 // try { // consume next character
2125 unicodeAsBackSlash = false;
2126 currentCharacter = source[currentPosition++];
2127 // if (((currentCharacter = source[currentPosition++]) == '\\')
2128 // && (source[currentPosition] == 'u')) {
2129 // getNextUnicodeChar();
2131 if (withoutUnicodePtr != 0) {
2132 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2135 // } catch (InvalidInputException ex) {
2143 // try { // consume next character
2144 unicodeAsBackSlash = false;
2145 currentCharacter = source[currentPosition++];
2146 // if (((currentCharacter = source[currentPosition++]) == '\\')
2147 // && (source[currentPosition] == 'u')) {
2148 // getNextUnicodeChar();
2150 if (withoutUnicodePtr != 0) {
2151 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2154 // } catch (InvalidInputException ex) {
2156 while (currentCharacter != '"') {
2157 if (currentCharacter == '\r') {
2158 if (source[currentPosition] == '\n')
2161 // the string cannot go further that the line
2163 if (currentCharacter == '\n') {
2165 // the string cannot go further that the line
2167 if (currentCharacter == '\\') {
2169 scanDoubleQuotedEscapeCharacter();
2170 } catch (InvalidInputException ex) {
2174 // try { // consume next character
2175 unicodeAsBackSlash = false;
2176 currentCharacter = source[currentPosition++];
2177 // if (((currentCharacter = source[currentPosition++]) == '\\')
2178 // && (source[currentPosition] == 'u')) {
2179 // getNextUnicodeChar();
2181 if (withoutUnicodePtr != 0) {
2182 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2185 // } catch (InvalidInputException ex) {
2188 } catch (IndexOutOfBoundsException e) {
2194 if ((test = getNextChar('/', '*')) == 0) {
2198 currentCharacter = source[currentPosition++];
2199 // if (((currentCharacter = source[currentPosition++]) ==
2201 // && (source[currentPosition] == 'u')) {
2202 // //-------------unicode traitement ------------
2203 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2204 // currentPosition++;
2205 // while (source[currentPosition] == 'u') {
2206 // currentPosition++;
2209 // Character.getNumericValue(source[currentPosition++]))
2213 // Character.getNumericValue(source[currentPosition++]))
2217 // Character.getNumericValue(source[currentPosition++]))
2221 // Character.getNumericValue(source[currentPosition++]))
2224 // //error don't care of the value
2225 // currentCharacter = 'A';
2226 // } //something different from \n and \r
2228 // currentCharacter =
2229 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2232 while (currentCharacter != '\r' && currentCharacter != '\n') {
2234 currentCharacter = source[currentPosition++];
2235 // if (((currentCharacter = source[currentPosition++])
2237 // && (source[currentPosition] == 'u')) {
2238 // //-------------unicode traitement ------------
2239 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2240 // currentPosition++;
2241 // while (source[currentPosition] == 'u') {
2242 // currentPosition++;
2245 // Character.getNumericValue(source[currentPosition++]))
2249 // Character.getNumericValue(source[currentPosition++]))
2253 // Character.getNumericValue(source[currentPosition++]))
2257 // Character.getNumericValue(source[currentPosition++]))
2260 // //error don't care of the value
2261 // currentCharacter = 'A';
2262 // } //something different from \n and \r
2264 // currentCharacter =
2265 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2269 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2270 pushLineSeparator();
2271 } catch (IndexOutOfBoundsException e) {
2272 } //an eof will them be generated
2276 //traditional and annotation comment
2277 boolean star = false;
2278 // try { // consume next character
2279 unicodeAsBackSlash = false;
2280 currentCharacter = source[currentPosition++];
2281 // if (((currentCharacter = source[currentPosition++]) == '\\')
2282 // && (source[currentPosition] == 'u')) {
2283 // getNextUnicodeChar();
2285 if (withoutUnicodePtr != 0) {
2286 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2289 // } catch (InvalidInputException ex) {
2291 if (currentCharacter == '*') {
2294 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2295 pushLineSeparator();
2296 try { //get the next char
2297 currentCharacter = source[currentPosition++];
2298 // if (((currentCharacter = source[currentPosition++]) ==
2300 // && (source[currentPosition] == 'u')) {
2301 // //-------------unicode traitement ------------
2302 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2303 // currentPosition++;
2304 // while (source[currentPosition] == 'u') {
2305 // currentPosition++;
2308 // Character.getNumericValue(source[currentPosition++]))
2312 // Character.getNumericValue(source[currentPosition++]))
2316 // Character.getNumericValue(source[currentPosition++]))
2320 // Character.getNumericValue(source[currentPosition++]))
2323 // //error don't care of the value
2324 // currentCharacter = 'A';
2325 // } //something different from * and /
2327 // currentCharacter =
2328 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2331 //loop until end of comment */
2332 while ((currentCharacter != '/') || (!star)) {
2333 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2334 pushLineSeparator();
2335 star = currentCharacter == '*';
2337 currentCharacter = source[currentPosition++];
2338 // if (((currentCharacter = source[currentPosition++])
2340 // && (source[currentPosition] == 'u')) {
2341 // //-------------unicode traitement ------------
2342 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2343 // currentPosition++;
2344 // while (source[currentPosition] == 'u') {
2345 // currentPosition++;
2348 // Character.getNumericValue(source[currentPosition++]))
2352 // Character.getNumericValue(source[currentPosition++]))
2356 // Character.getNumericValue(source[currentPosition++]))
2360 // Character.getNumericValue(source[currentPosition++]))
2363 // //error don't care of the value
2364 // currentCharacter = 'A';
2365 // } //something different from * and /
2367 // currentCharacter =
2368 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2372 } catch (IndexOutOfBoundsException e) {
2380 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2382 scanIdentifierOrKeyword((currentCharacter == '$'));
2383 } catch (InvalidInputException ex) {
2388 if (Character.isDigit(currentCharacter)) {
2391 } catch (InvalidInputException ex) {
2398 //-----------------end switch while try--------------------
2399 } catch (IndexOutOfBoundsException e) {
2400 } catch (InvalidInputException e) {
2405 // public final boolean jumpOverUnicodeWhiteSpace()
2406 // throws InvalidInputException {
2408 // //handle the case of unicode. Jump over the next whiteSpace
2409 // //making startPosition pointing on the next available char
2410 // //On false, the currentCharacter is filled up with a potential
2414 // this.wasAcr = false;
2415 // int c1, c2, c3, c4;
2416 // int unicodeSize = 6;
2417 // currentPosition++;
2418 // while (source[currentPosition] == 'u') {
2419 // currentPosition++;
2423 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2425 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2427 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2429 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2431 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2434 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2435 // if (recordLineSeparator
2436 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2437 // pushLineSeparator();
2438 // if (Character.isWhitespace(currentCharacter))
2441 // //buffer the new char which is not a white space
2442 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2443 // //withoutUnicodePtr == 1 is true here
2445 // } catch (IndexOutOfBoundsException e) {
2446 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2449 public final int[] getLineEnds() {
2450 //return a bounded copy of this.lineEnds
2452 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2456 public char[] getSource() {
2460 public static boolean isIdentifierOrKeyword(int token) {
2461 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2464 final char[] optimizedCurrentTokenSource1() {
2465 //return always the same char[] build only once
2466 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2467 char charOne = source[startPosition];
2522 return new char[] { charOne };
2526 final char[] optimizedCurrentTokenSource2() {
2528 c0 = source[startPosition];
2529 c1 = source[startPosition + 1];
2531 //return always the same char[] build only once
2532 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2535 return charArray_va;
2537 return charArray_vb;
2539 return charArray_vc;
2541 return charArray_vd;
2543 return charArray_ve;
2545 return charArray_vf;
2547 return charArray_vg;
2549 return charArray_vh;
2551 return charArray_vi;
2553 return charArray_vj;
2555 return charArray_vk;
2557 return charArray_vl;
2559 return charArray_vm;
2561 return charArray_vn;
2563 return charArray_vo;
2565 return charArray_vp;
2567 return charArray_vq;
2569 return charArray_vr;
2571 return charArray_vs;
2573 return charArray_vt;
2575 return charArray_vu;
2577 return charArray_vv;
2579 return charArray_vw;
2581 return charArray_vx;
2583 return charArray_vy;
2585 return charArray_vz;
2588 //try to return the same char[] build only once
2589 int hash = ((c0 << 6) + c1) % TableSize;
2590 char[][] table = charArray_length[0][hash];
2592 while (++i < InternalTableSize) {
2593 char[] charArray = table[i];
2594 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2597 //---------other side---------
2599 int max = newEntry2;
2600 while (++i <= max) {
2601 char[] charArray = table[i];
2602 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2605 //--------add the entry-------
2606 if (++max >= InternalTableSize)
2609 table[max] = (r = new char[] { c0, c1 });
2614 final char[] optimizedCurrentTokenSource3() {
2615 //try to return the same char[] build only once
2617 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2619 char[][] table = charArray_length[1][hash];
2621 while (++i < InternalTableSize) {
2622 char[] charArray = table[i];
2623 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2626 //---------other side---------
2628 int max = newEntry3;
2629 while (++i <= max) {
2630 char[] charArray = table[i];
2631 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2634 //--------add the entry-------
2635 if (++max >= InternalTableSize)
2638 table[max] = (r = new char[] { c0, c1, c2 });
2643 final char[] optimizedCurrentTokenSource4() {
2644 //try to return the same char[] build only once
2645 char c0, c1, c2, c3;
2646 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2647 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2649 char[][] table = charArray_length[2][(int) hash];
2651 while (++i < InternalTableSize) {
2652 char[] charArray = table[i];
2653 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2656 //---------other side---------
2658 int max = newEntry4;
2659 while (++i <= max) {
2660 char[] charArray = table[i];
2661 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2664 //--------add the entry-------
2665 if (++max >= InternalTableSize)
2668 table[max] = (r = new char[] { c0, c1, c2, c3 });
2673 final char[] optimizedCurrentTokenSource5() {
2674 //try to return the same char[] build only once
2675 char c0, c1, c2, c3, c4;
2676 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2677 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2679 char[][] table = charArray_length[3][(int) hash];
2681 while (++i < InternalTableSize) {
2682 char[] charArray = table[i];
2683 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2686 //---------other side---------
2688 int max = newEntry5;
2689 while (++i <= max) {
2690 char[] charArray = table[i];
2691 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2694 //--------add the entry-------
2695 if (++max >= InternalTableSize)
2698 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2703 final char[] optimizedCurrentTokenSource6() {
2704 //try to return the same char[] build only once
2705 char c0, c1, c2, c3, c4, c5;
2706 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2707 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2708 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2710 char[][] table = charArray_length[4][(int) hash];
2712 while (++i < InternalTableSize) {
2713 char[] charArray = table[i];
2714 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2715 && (c5 == charArray[5]))
2718 //---------other side---------
2720 int max = newEntry6;
2721 while (++i <= max) {
2722 char[] charArray = table[i];
2723 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2724 && (c5 == charArray[5]))
2727 //--------add the entry-------
2728 if (++max >= InternalTableSize)
2731 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2736 public final void pushLineSeparator() throws InvalidInputException {
2737 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2738 final int INCREMENT = 250;
2739 if (this.checkNonExternalizedStringLiterals) {
2740 // reinitialize the current line for non externalize strings purpose
2743 //currentCharacter is at position currentPosition-1
2745 if (currentCharacter == '\r') {
2746 int separatorPos = currentPosition - 1;
2747 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2749 //System.out.println("CR-" + separatorPos);
2751 lineEnds[++linePtr] = separatorPos;
2752 } catch (IndexOutOfBoundsException e) {
2753 //linePtr value is correct
2754 int oldLength = lineEnds.length;
2755 int[] old = lineEnds;
2756 lineEnds = new int[oldLength + INCREMENT];
2757 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2758 lineEnds[linePtr] = separatorPos;
2760 // look-ahead for merged cr+lf
2762 if (source[currentPosition] == '\n') {
2763 //System.out.println("look-ahead LF-" + currentPosition);
2764 lineEnds[linePtr] = currentPosition;
2770 } catch (IndexOutOfBoundsException e) {
2775 if (currentCharacter == '\n') {
2776 //must merge eventual cr followed by lf
2777 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2778 //System.out.println("merge LF-" + (currentPosition - 1));
2779 lineEnds[linePtr] = currentPosition - 1;
2781 int separatorPos = currentPosition - 1;
2782 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2784 // System.out.println("LF-" + separatorPos);
2786 lineEnds[++linePtr] = separatorPos;
2787 } catch (IndexOutOfBoundsException e) {
2788 //linePtr value is correct
2789 int oldLength = lineEnds.length;
2790 int[] old = lineEnds;
2791 lineEnds = new int[oldLength + INCREMENT];
2792 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2793 lineEnds[linePtr] = separatorPos;
2801 public final void pushUnicodeLineSeparator() {
2802 // isUnicode means that the \r or \n has been read as a unicode character
2803 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2804 final int INCREMENT = 250;
2805 //currentCharacter is at position currentPosition-1
2806 if (this.checkNonExternalizedStringLiterals) {
2807 // reinitialize the current line for non externalize strings purpose
2811 if (currentCharacter == '\r') {
2812 int separatorPos = currentPosition - 6;
2813 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2815 //System.out.println("CR-" + separatorPos);
2817 lineEnds[++linePtr] = separatorPos;
2818 } catch (IndexOutOfBoundsException e) {
2819 //linePtr value is correct
2820 int oldLength = lineEnds.length;
2821 int[] old = lineEnds;
2822 lineEnds = new int[oldLength + INCREMENT];
2823 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2824 lineEnds[linePtr] = separatorPos;
2826 // look-ahead for merged cr+lf
2827 if (source[currentPosition] == '\n') {
2828 //System.out.println("look-ahead LF-" + currentPosition);
2829 lineEnds[linePtr] = currentPosition;
2837 if (currentCharacter == '\n') {
2838 //must merge eventual cr followed by lf
2839 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2840 //System.out.println("merge LF-" + (currentPosition - 1));
2841 lineEnds[linePtr] = currentPosition - 6;
2843 int separatorPos = currentPosition - 6;
2844 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2846 // System.out.println("LF-" + separatorPos);
2848 lineEnds[++linePtr] = separatorPos;
2849 } catch (IndexOutOfBoundsException e) {
2850 //linePtr value is correct
2851 int oldLength = lineEnds.length;
2852 int[] old = lineEnds;
2853 lineEnds = new int[oldLength + INCREMENT];
2854 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2855 lineEnds[linePtr] = separatorPos;
2863 public void recordComment(int token) {
2865 int stopPosition = this.currentPosition;
2867 case TokenNameCOMMENT_LINE:
2868 stopPosition = -this.lastCommentLinePosition;
2870 case TokenNameCOMMENT_BLOCK:
2871 stopPosition = -this.currentPosition;
2875 // a new comment is recorded
2876 int length = this.commentStops.length;
2877 if (++this.commentPtr >= length) {
2878 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2879 //grows the positions buffers too
2880 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2882 this.commentStops[this.commentPtr] = stopPosition;
2883 this.commentStarts[this.commentPtr] = this.startPosition;
2886 // public final void recordComment(boolean isJavadoc) {
2887 // // a new annotation comment is recorded
2889 // commentStops[++commentPtr] = isJavadoc
2890 // ? currentPosition
2891 // : -currentPosition;
2892 // } catch (IndexOutOfBoundsException e) {
2893 // int oldStackLength = commentStops.length;
2894 // int[] oldStack = commentStops;
2895 // commentStops = new int[oldStackLength + 30];
2896 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2897 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2898 // //grows the positions buffers too
2899 // int[] old = commentStarts;
2900 // commentStarts = new int[oldStackLength + 30];
2901 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2903 // //the buffer is of a correct size here
2904 // commentStarts[commentPtr] = startPosition;
2906 public void resetTo(int begin, int end) {
2907 //reset the scanner to a given position where it may rescan again
2909 initialPosition = startPosition = currentPosition = begin;
2910 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2911 commentPtr = -1; // reset comment stack
2914 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2915 // the string with "\\u" is a legal string of two chars \ and u
2916 //thus we use a direct access to the source (for regular cases).
2917 // if (unicodeAsBackSlash) {
2918 // // consume next character
2919 // unicodeAsBackSlash = false;
2920 // if (((currentCharacter = source[currentPosition++]) == '\\')
2921 // && (source[currentPosition] == 'u')) {
2922 // getNextUnicodeChar();
2924 // if (withoutUnicodePtr != 0) {
2925 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2929 currentCharacter = source[currentPosition++];
2930 switch (currentCharacter) {
2932 currentCharacter = '\'';
2935 currentCharacter = '\\';
2938 currentCharacter = '\\';
2943 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2944 // the string with "\\u" is a legal string of two chars \ and u
2945 //thus we use a direct access to the source (for regular cases).
2946 // if (unicodeAsBackSlash) {
2947 // // consume next character
2948 // unicodeAsBackSlash = false;
2949 // if (((currentCharacter = source[currentPosition++]) == '\\')
2950 // && (source[currentPosition] == 'u')) {
2951 // getNextUnicodeChar();
2953 // if (withoutUnicodePtr != 0) {
2954 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2958 currentCharacter = source[currentPosition++];
2959 switch (currentCharacter) {
2961 // currentCharacter = '\b';
2964 currentCharacter = '\t';
2967 currentCharacter = '\n';
2970 // currentCharacter = '\f';
2973 currentCharacter = '\r';
2976 currentCharacter = '\"';
2979 currentCharacter = '\'';
2982 currentCharacter = '\\';
2985 currentCharacter = '$';
2988 // -----------octal escape--------------
2990 // OctalDigit OctalDigit
2991 // ZeroToThree OctalDigit OctalDigit
2992 int number = Character.getNumericValue(currentCharacter);
2993 if (number >= 0 && number <= 7) {
2994 boolean zeroToThreeNot = number > 3;
2995 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2996 int digit = Character.getNumericValue(currentCharacter);
2997 if (digit >= 0 && digit <= 7) {
2998 number = (number * 8) + digit;
2999 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3000 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3001 // Digit --> ignore last character
3004 digit = Character.getNumericValue(currentCharacter);
3005 if (digit >= 0 && digit <= 7) {
3006 // has read \ZeroToThree OctalDigit OctalDigit
3007 number = (number * 8) + digit;
3008 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3009 // --> ignore last character
3013 } else { // has read \OctalDigit NonDigit--> ignore last
3017 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3021 } else { // has read \OctalDigit --> ignore last character
3025 throw new InvalidInputException(INVALID_ESCAPE);
3026 currentCharacter = (char) number;
3029 // throw new InvalidInputException(INVALID_ESCAPE);
3033 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3034 // return scanIdentifierOrKeyword( false );
3036 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3038 //first dispatch on the first char.
3039 //then the length. If there are several
3040 //keywors with the same length AND the same first char, then do another
3041 //disptach on the second char :-)...cool....but fast !
3042 useAssertAsAnIndentifier = false;
3043 while (getNextCharAsJavaIdentifierPart()) {
3047 // if (new String(getCurrentTokenSource()).equals("$this")) {
3048 // return TokenNamethis;
3050 return TokenNameVariable;
3055 // if (withoutUnicodePtr == 0)
3056 //quick test on length == 1 but not on length > 12 while most identifier
3057 //have a length which is <= 12...but there are lots of identifier with
3060 if ((length = currentPosition - startPosition) == 1)
3061 return TokenNameIdentifier;
3063 data = new char[length];
3064 index = startPosition;
3065 for (int i = 0; i < length; i++) {
3066 data[i] = Character.toLowerCase(source[index + i]);
3070 // if ((length = withoutUnicodePtr) == 1)
3071 // return TokenNameIdentifier;
3072 // // data = withoutUnicodeBuffer;
3073 // data = new char[withoutUnicodeBuffer.length];
3074 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3075 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3079 firstLetter = data[index];
3080 switch (firstLetter) {
3085 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3086 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3087 return TokenNameFILE;
3088 index = 0; //__LINE__
3089 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3090 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3091 return TokenNameLINE;
3095 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3096 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3097 return TokenNameCLASS_C;
3101 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3102 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3103 && (data[++index] == '_'))
3104 return TokenNameMETHOD_C;
3108 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3109 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3110 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3111 return TokenNameFUNC_C;
3114 return TokenNameIdentifier;
3116 // as and array abstract
3120 if ((data[++index] == 's')) {
3123 return TokenNameIdentifier;
3127 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3128 return TokenNameand;
3130 return TokenNameIdentifier;
3134 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3135 return TokenNamearray;
3137 return TokenNameIdentifier;
3139 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3140 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3141 return TokenNameabstract;
3143 return TokenNameIdentifier;
3145 return TokenNameIdentifier;
3151 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3152 return TokenNamebreak;
3154 return TokenNameIdentifier;
3156 return TokenNameIdentifier;
3159 //case catch class clone const continue
3162 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3163 return TokenNamecase;
3165 return TokenNameIdentifier;
3167 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3168 return TokenNamecatch;
3170 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3171 return TokenNameclass;
3173 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3174 return TokenNameclone;
3176 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3177 return TokenNameconst;
3179 return TokenNameIdentifier;
3181 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3182 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3183 return TokenNamecontinue;
3185 return TokenNameIdentifier;
3187 return TokenNameIdentifier;
3190 // declare default do die
3191 // TODO delete define ==> no keyword !
3194 if ((data[++index] == 'o'))
3197 return TokenNameIdentifier;
3199 // if ((data[++index] == 'e')
3200 // && (data[++index] == 'f')
3201 // && (data[++index] == 'i')
3202 // && (data[++index] == 'n')
3203 // && (data[++index] == 'e'))
3204 // return TokenNamedefine;
3206 // return TokenNameIdentifier;
3208 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3209 && (data[++index] == 'r') && (data[++index] == 'e'))
3210 return TokenNamedeclare;
3212 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3213 && (data[++index] == 'l') && (data[++index] == 't'))
3214 return TokenNamedefault;
3216 return TokenNameIdentifier;
3218 return TokenNameIdentifier;
3221 //echo else exit elseif extends eval
3224 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3225 return TokenNameecho;
3226 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3227 return TokenNameelse;
3228 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3229 return TokenNameexit;
3230 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3231 return TokenNameeval;
3233 return TokenNameIdentifier;
3236 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3237 return TokenNameendif;
3238 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3239 return TokenNameempty;
3241 return TokenNameIdentifier;
3244 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3245 && (data[++index] == 'r'))
3246 return TokenNameendfor;
3247 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3248 && (data[++index] == 'f'))
3249 return TokenNameelseif;
3251 return TokenNameIdentifier;
3253 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3254 && (data[++index] == 'd') && (data[++index] == 's'))
3255 return TokenNameextends;
3257 return TokenNameIdentifier;
3260 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3261 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3262 return TokenNameendwhile;
3264 return TokenNameIdentifier;
3267 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3268 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3269 return TokenNameendswitch;
3271 return TokenNameIdentifier;
3274 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3275 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3276 && (data[++index] == 'e'))
3277 return TokenNameenddeclare;
3279 if ((data[++index] == 'n') // endforeach
3280 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3281 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3282 return TokenNameendforeach;
3284 return TokenNameIdentifier;
3286 return TokenNameIdentifier;
3289 //for false final function
3292 if ((data[++index] == 'o') && (data[++index] == 'r'))
3293 return TokenNamefor;
3295 return TokenNameIdentifier;
3297 // if ((data[++index] == 'a') && (data[++index] == 'l')
3298 // && (data[++index] == 's') && (data[++index] == 'e'))
3299 // return TokenNamefalse;
3300 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3301 return TokenNamefinal;
3303 return TokenNameIdentifier;
3306 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3307 && (data[++index] == 'c') && (data[++index] == 'h'))
3308 return TokenNameforeach;
3310 return TokenNameIdentifier;
3313 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3314 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3315 return TokenNamefunction;
3317 return TokenNameIdentifier;
3319 return TokenNameIdentifier;
3324 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3325 && (data[++index] == 'l')) {
3326 return TokenNameglobal;
3329 return TokenNameIdentifier;
3331 //if int isset include include_once instanceof interface implements
3334 if (data[++index] == 'f')
3337 return TokenNameIdentifier;
3339 // if ((data[++index] == 'n') && (data[++index] == 't'))
3340 // return TokenNameint;
3342 // return TokenNameIdentifier;
3344 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3345 return TokenNameisset;
3347 return TokenNameIdentifier;
3349 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3350 && (data[++index] == 'd') && (data[++index] == 'e'))
3351 return TokenNameinclude;
3353 return TokenNameIdentifier;
3356 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3357 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3358 return TokenNameinterface;
3360 return TokenNameIdentifier;
3363 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3364 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3365 && (data[++index] == 'f'))
3366 return TokenNameinstanceof;
3367 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3368 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3369 && (data[++index] == 's'))
3370 return TokenNameimplements;
3372 return TokenNameIdentifier;
3374 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3375 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3376 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3377 return TokenNameinclude_once;
3379 return TokenNameIdentifier;
3381 return TokenNameIdentifier;
3386 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3387 return TokenNamelist;
3390 return TokenNameIdentifier;
3395 if ((data[++index] == 'e') && (data[++index] == 'w'))
3396 return TokenNamenew;
3398 return TokenNameIdentifier;
3400 // if ((data[++index] == 'u') && (data[++index] == 'l')
3401 // && (data[++index] == 'l'))
3402 // return TokenNamenull;
3404 // return TokenNameIdentifier;
3406 return TokenNameIdentifier;
3411 if (data[++index] == 'r') {
3415 // if (length == 12) {
3416 // if ((data[++index] == 'l')
3417 // && (data[++index] == 'd')
3418 // && (data[++index] == '_')
3419 // && (data[++index] == 'f')
3420 // && (data[++index] == 'u')
3421 // && (data[++index] == 'n')
3422 // && (data[++index] == 'c')
3423 // && (data[++index] == 't')
3424 // && (data[++index] == 'i')
3425 // && (data[++index] == 'o')
3426 // && (data[++index] == 'n')) {
3427 // return TokenNameold_function;
3430 return TokenNameIdentifier;
3432 // print public private protected
3435 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3436 return TokenNameprint;
3438 return TokenNameIdentifier;
3440 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3441 && (data[++index] == 'c')) {
3442 return TokenNamepublic;
3444 return TokenNameIdentifier;
3446 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3447 && (data[++index] == 't') && (data[++index] == 'e')) {
3448 return TokenNameprivate;
3450 return TokenNameIdentifier;
3452 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3453 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3454 return TokenNameprotected;
3456 return TokenNameIdentifier;
3458 return TokenNameIdentifier;
3460 //return require require_once
3462 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3463 && (data[++index] == 'n')) {
3464 return TokenNamereturn;
3466 } else if (length == 7) {
3467 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3468 && (data[++index] == 'r') && (data[++index] == 'e')) {
3469 return TokenNamerequire;
3471 } else if (length == 12) {
3472 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3473 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3474 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3475 return TokenNamerequire_once;
3478 return TokenNameIdentifier;
3483 if (data[++index] == 't')
3484 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3485 return TokenNamestatic;
3487 return TokenNameIdentifier;
3488 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3489 && (data[++index] == 'h'))
3490 return TokenNameswitch;
3492 return TokenNameIdentifier;
3494 return TokenNameIdentifier;
3500 if ((data[++index] == 'r') && (data[++index] == 'y'))
3501 return TokenNametry;
3503 return TokenNameIdentifier;
3505 // if ((data[++index] == 'r') && (data[++index] == 'u')
3506 // && (data[++index] == 'e'))
3507 // return TokenNametrue;
3509 // return TokenNameIdentifier;
3511 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3512 return TokenNamethrow;
3514 return TokenNameIdentifier;
3516 return TokenNameIdentifier;
3522 if ((data[++index] == 's') && (data[++index] == 'e'))
3523 return TokenNameuse;
3525 return TokenNameIdentifier;
3527 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3528 return TokenNameunset;
3530 return TokenNameIdentifier;
3532 return TokenNameIdentifier;
3538 if ((data[++index] == 'a') && (data[++index] == 'r'))
3539 return TokenNamevar;
3541 return TokenNameIdentifier;
3543 return TokenNameIdentifier;
3549 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3550 return TokenNamewhile;
3552 return TokenNameIdentifier;
3553 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3554 // (data[++index]=='e') && (data[++index]=='f')&&
3555 // (data[++index]=='p'))
3556 //return TokenNamewidefp ;
3558 //return TokenNameIdentifier;
3560 return TokenNameIdentifier;
3566 if ((data[++index] == 'o') && (data[++index] == 'r'))
3567 return TokenNamexor;
3569 return TokenNameIdentifier;
3571 return TokenNameIdentifier;
3574 return TokenNameIdentifier;
3578 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3579 //when entering this method the currentCharacter is the firt
3580 //digit of the number , i.e. it may be preceeded by a . when
3582 boolean floating = dotPrefix;
3583 if ((!dotPrefix) && (currentCharacter == '0')) {
3584 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3585 //force the first char of the hexa number do exist...
3586 // consume next character
3587 unicodeAsBackSlash = false;
3588 currentCharacter = source[currentPosition++];
3589 // if (((currentCharacter = source[currentPosition++]) == '\\')
3590 // && (source[currentPosition] == 'u')) {
3591 // getNextUnicodeChar();
3593 // if (withoutUnicodePtr != 0) {
3594 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3597 if (Character.digit(currentCharacter, 16) == -1)
3598 throw new InvalidInputException(INVALID_HEXA);
3600 while (getNextCharAsDigit(16)) {
3603 // if (getNextChar('l', 'L') >= 0)
3604 // return TokenNameLongLiteral;
3606 return TokenNameIntegerLiteral;
3608 //there is x or X in the number
3609 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3610 // 00078.0 is true !!!!! crazy language
3611 if (getNextCharAsDigit()) {
3612 //-------------potential octal-----------------
3613 while (getNextCharAsDigit()) {
3616 // if (getNextChar('l', 'L') >= 0) {
3617 // return TokenNameLongLiteral;
3620 // if (getNextChar('f', 'F') >= 0) {
3621 // return TokenNameFloatingPointLiteral;
3623 if (getNextChar('d', 'D') >= 0) {
3624 return TokenNameDoubleLiteral;
3625 } else { //make the distinction between octal and float ....
3626 if (getNextChar('.')) { //bingo ! ....
3627 while (getNextCharAsDigit()) {
3630 if (getNextChar('e', 'E') >= 0) {
3631 // consume next character
3632 unicodeAsBackSlash = false;
3633 currentCharacter = source[currentPosition++];
3634 // if (((currentCharacter = source[currentPosition++]) == '\\')
3635 // && (source[currentPosition] == 'u')) {
3636 // getNextUnicodeChar();
3638 // if (withoutUnicodePtr != 0) {
3639 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3642 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3643 // consume next character
3644 unicodeAsBackSlash = false;
3645 currentCharacter = source[currentPosition++];
3646 // if (((currentCharacter = source[currentPosition++]) == '\\')
3647 // && (source[currentPosition] == 'u')) {
3648 // getNextUnicodeChar();
3650 // if (withoutUnicodePtr != 0) {
3651 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3652 // currentCharacter;
3656 if (!Character.isDigit(currentCharacter))
3657 throw new InvalidInputException(INVALID_FLOAT);
3658 while (getNextCharAsDigit()) {
3662 // if (getNextChar('f', 'F') >= 0)
3663 // return TokenNameFloatingPointLiteral;
3664 getNextChar('d', 'D'); //jump over potential d or D
3665 return TokenNameDoubleLiteral;
3667 return TokenNameIntegerLiteral;
3674 while (getNextCharAsDigit()) {
3677 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3678 // return TokenNameLongLiteral;
3679 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3680 while (getNextCharAsDigit()) {
3685 //if floating is true both exponant and suffix may be optional
3686 if (getNextChar('e', 'E') >= 0) {
3688 // consume next character
3689 unicodeAsBackSlash = false;
3690 currentCharacter = source[currentPosition++];
3691 // if (((currentCharacter = source[currentPosition++]) == '\\')
3692 // && (source[currentPosition] == 'u')) {
3693 // getNextUnicodeChar();
3695 // if (withoutUnicodePtr != 0) {
3696 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3699 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3702 unicodeAsBackSlash = false;
3703 currentCharacter = source[currentPosition++];
3704 // if (((currentCharacter = source[currentPosition++]) == '\\')
3705 // && (source[currentPosition] == 'u')) {
3706 // getNextUnicodeChar();
3708 // if (withoutUnicodePtr != 0) {
3709 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3713 if (!Character.isDigit(currentCharacter))
3714 throw new InvalidInputException(INVALID_FLOAT);
3715 while (getNextCharAsDigit()) {
3719 if (getNextChar('d', 'D') >= 0)
3720 return TokenNameDoubleLiteral;
3721 // if (getNextChar('f', 'F') >= 0)
3722 // return TokenNameFloatingPointLiteral;
3723 //the long flag has been tested before
3724 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3728 * Search the line number corresponding to a specific position
3731 public final int getLineNumber(int position) {
3732 if (lineEnds == null)
3734 int length = linePtr + 1;
3737 int g = 0, d = length - 1;
3741 if (position < lineEnds[m]) {
3743 } else if (position > lineEnds[m]) {
3749 if (position < lineEnds[m]) {
3755 public void setPHPMode(boolean mode) {
3759 public final void setSource(char[] source) {
3760 setSource(null, source);
3763 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3764 //the source-buffer is set to sourceString
3765 this.compilationUnit = compilationUnit;
3766 if (source == null) {
3767 this.source = new char[0];
3769 this.source = source;
3772 initialPosition = currentPosition = 0;
3773 containsAssertKeyword = false;
3774 withoutUnicodeBuffer = new char[this.source.length];
3775 encapsedStringStack = new Stack();
3778 public String toString() {
3779 if (startPosition == source.length)
3780 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3781 if (currentPosition > source.length)
3782 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3783 char front[] = new char[startPosition];
3784 System.arraycopy(source, 0, front, 0, startPosition);
3785 int middleLength = (currentPosition - 1) - startPosition + 1;
3787 if (middleLength > -1) {
3788 middle = new char[middleLength];
3789 System.arraycopy(source, startPosition, middle, 0, middleLength);
3791 middle = new char[0];
3793 char end[] = new char[source.length - (currentPosition - 1)];
3794 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3795 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3796 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3800 public final String toStringAction(int act) {
3802 case TokenNameERROR:
3803 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3805 case TokenNameINLINE_HTML:
3806 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3807 case TokenNameIdentifier:
3808 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3809 case TokenNameVariable:
3810 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3811 case TokenNameabstract:
3812 return "abstract"; //$NON-NLS-1$
3814 return "AND"; //$NON-NLS-1$
3815 case TokenNamearray:
3816 return "array"; //$NON-NLS-1$
3818 return "as"; //$NON-NLS-1$
3819 case TokenNamebreak:
3820 return "break"; //$NON-NLS-1$
3822 return "case"; //$NON-NLS-1$
3823 case TokenNameclass:
3824 return "class"; //$NON-NLS-1$
3825 case TokenNamecatch:
3826 return "catch"; //$NON-NLS-1$
3827 case TokenNameclone:
3830 case TokenNameconst:
3833 case TokenNamecontinue:
3834 return "continue"; //$NON-NLS-1$
3835 case TokenNamedefault:
3836 return "default"; //$NON-NLS-1$
3837 // case TokenNamedefine :
3838 // return "define"; //$NON-NLS-1$
3840 return "do"; //$NON-NLS-1$
3842 return "echo"; //$NON-NLS-1$
3844 return "else"; //$NON-NLS-1$
3845 case TokenNameelseif:
3846 return "elseif"; //$NON-NLS-1$
3847 case TokenNameendfor:
3848 return "endfor"; //$NON-NLS-1$
3849 case TokenNameendforeach:
3850 return "endforeach"; //$NON-NLS-1$
3851 case TokenNameendif:
3852 return "endif"; //$NON-NLS-1$
3853 case TokenNameendswitch:
3854 return "endswitch"; //$NON-NLS-1$
3855 case TokenNameendwhile:
3856 return "endwhile"; //$NON-NLS-1$
3859 case TokenNameextends:
3860 return "extends"; //$NON-NLS-1$
3861 // case TokenNamefalse :
3862 // return "false"; //$NON-NLS-1$
3863 case TokenNamefinal:
3864 return "final"; //$NON-NLS-1$
3866 return "for"; //$NON-NLS-1$
3867 case TokenNameforeach:
3868 return "foreach"; //$NON-NLS-1$
3869 case TokenNamefunction:
3870 return "function"; //$NON-NLS-1$
3871 case TokenNameglobal:
3872 return "global"; //$NON-NLS-1$
3874 return "if"; //$NON-NLS-1$
3875 case TokenNameimplements:
3876 return "implements"; //$NON-NLS-1$
3877 case TokenNameinclude:
3878 return "include"; //$NON-NLS-1$
3879 case TokenNameinclude_once:
3880 return "include_once"; //$NON-NLS-1$
3881 case TokenNameinstanceof:
3882 return "instanceof"; //$NON-NLS-1$
3883 case TokenNameinterface:
3884 return "interface"; //$NON-NLS-1$
3885 case TokenNameisset:
3886 return "isset"; //$NON-NLS-1$
3888 return "list"; //$NON-NLS-1$
3890 return "new"; //$NON-NLS-1$
3891 // case TokenNamenull :
3892 // return "null"; //$NON-NLS-1$
3894 return "OR"; //$NON-NLS-1$
3895 case TokenNameprint:
3896 return "print"; //$NON-NLS-1$
3897 case TokenNameprivate:
3898 return "private"; //$NON-NLS-1$
3899 case TokenNameprotected:
3900 return "protected"; //$NON-NLS-1$
3901 case TokenNamepublic:
3902 return "public"; //$NON-NLS-1$
3903 case TokenNamerequire:
3904 return "require"; //$NON-NLS-1$
3905 case TokenNamerequire_once:
3906 return "require_once"; //$NON-NLS-1$
3907 case TokenNamereturn:
3908 return "return"; //$NON-NLS-1$
3909 case TokenNamestatic:
3910 return "static"; //$NON-NLS-1$
3911 case TokenNameswitch:
3912 return "switch"; //$NON-NLS-1$
3913 // case TokenNametrue :
3914 // return "true"; //$NON-NLS-1$
3915 case TokenNameunset:
3916 return "unset"; //$NON-NLS-1$
3918 return "var"; //$NON-NLS-1$
3919 case TokenNamewhile:
3920 return "while"; //$NON-NLS-1$
3922 return "XOR"; //$NON-NLS-1$
3923 // case TokenNamethis :
3924 // return "$this"; //$NON-NLS-1$
3925 case TokenNameIntegerLiteral:
3926 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3927 case TokenNameDoubleLiteral:
3928 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3929 case TokenNameStringDoubleQuote:
3930 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3931 case TokenNameStringSingleQuote:
3932 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3933 case TokenNameStringInterpolated:
3934 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3935 case TokenNameEncapsedString0:
3936 return "`"; //$NON-NLS-1$
3937 case TokenNameEncapsedString1:
3938 return "\'"; //$NON-NLS-1$
3939 case TokenNameEncapsedString2:
3940 return "\""; //$NON-NLS-1$
3941 case TokenNameSTRING:
3942 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3943 case TokenNameHEREDOC:
3944 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3945 case TokenNamePLUS_PLUS:
3946 return "++"; //$NON-NLS-1$
3947 case TokenNameMINUS_MINUS:
3948 return "--"; //$NON-NLS-1$
3949 case TokenNameEQUAL_EQUAL:
3950 return "=="; //$NON-NLS-1$
3951 case TokenNameEQUAL_EQUAL_EQUAL:
3952 return "==="; //$NON-NLS-1$
3953 case TokenNameEQUAL_GREATER:
3954 return "=>"; //$NON-NLS-1$
3955 case TokenNameLESS_EQUAL:
3956 return "<="; //$NON-NLS-1$
3957 case TokenNameGREATER_EQUAL:
3958 return ">="; //$NON-NLS-1$
3959 case TokenNameNOT_EQUAL:
3960 return "!="; //$NON-NLS-1$
3961 case TokenNameNOT_EQUAL_EQUAL:
3962 return "!=="; //$NON-NLS-1$
3963 case TokenNameLEFT_SHIFT:
3964 return "<<"; //$NON-NLS-1$
3965 case TokenNameRIGHT_SHIFT:
3966 return ">>"; //$NON-NLS-1$
3967 case TokenNamePLUS_EQUAL:
3968 return "+="; //$NON-NLS-1$
3969 case TokenNameMINUS_EQUAL:
3970 return "-="; //$NON-NLS-1$
3971 case TokenNameMULTIPLY_EQUAL:
3972 return "*="; //$NON-NLS-1$
3973 case TokenNameDIVIDE_EQUAL:
3974 return "/="; //$NON-NLS-1$
3975 case TokenNameAND_EQUAL:
3976 return "&="; //$NON-NLS-1$
3977 case TokenNameOR_EQUAL:
3978 return "|="; //$NON-NLS-1$
3979 case TokenNameXOR_EQUAL:
3980 return "^="; //$NON-NLS-1$
3981 case TokenNameREMAINDER_EQUAL:
3982 return "%="; //$NON-NLS-1$
3983 case TokenNameDOT_EQUAL:
3984 return ".="; //$NON-NLS-1$
3985 case TokenNameLEFT_SHIFT_EQUAL:
3986 return "<<="; //$NON-NLS-1$
3987 case TokenNameRIGHT_SHIFT_EQUAL:
3988 return ">>="; //$NON-NLS-1$
3989 case TokenNameOR_OR:
3990 return "||"; //$NON-NLS-1$
3991 case TokenNameAND_AND:
3992 return "&&"; //$NON-NLS-1$
3994 return "+"; //$NON-NLS-1$
3995 case TokenNameMINUS:
3996 return "-"; //$NON-NLS-1$
3997 case TokenNameMINUS_GREATER:
4000 return "!"; //$NON-NLS-1$
4001 case TokenNameREMAINDER:
4002 return "%"; //$NON-NLS-1$
4004 return "^"; //$NON-NLS-1$
4006 return "&"; //$NON-NLS-1$
4007 case TokenNameMULTIPLY:
4008 return "*"; //$NON-NLS-1$
4010 return "|"; //$NON-NLS-1$
4011 case TokenNameTWIDDLE:
4012 return "~"; //$NON-NLS-1$
4013 case TokenNameTWIDDLE_EQUAL:
4014 return "~="; //$NON-NLS-1$
4015 case TokenNameDIVIDE:
4016 return "/"; //$NON-NLS-1$
4017 case TokenNameGREATER:
4018 return ">"; //$NON-NLS-1$
4020 return "<"; //$NON-NLS-1$
4021 case TokenNameLPAREN:
4022 return "("; //$NON-NLS-1$
4023 case TokenNameRPAREN:
4024 return ")"; //$NON-NLS-1$
4025 case TokenNameLBRACE:
4026 return "{"; //$NON-NLS-1$
4027 case TokenNameRBRACE:
4028 return "}"; //$NON-NLS-1$
4029 case TokenNameLBRACKET:
4030 return "["; //$NON-NLS-1$
4031 case TokenNameRBRACKET:
4032 return "]"; //$NON-NLS-1$
4033 case TokenNameSEMICOLON:
4034 return ";"; //$NON-NLS-1$
4035 case TokenNameQUESTION:
4036 return "?"; //$NON-NLS-1$
4037 case TokenNameCOLON:
4038 return ":"; //$NON-NLS-1$
4039 case TokenNameCOMMA:
4040 return ","; //$NON-NLS-1$
4042 return "."; //$NON-NLS-1$
4043 case TokenNameEQUAL:
4044 return "="; //$NON-NLS-1$
4047 case TokenNameDOLLAR:
4049 case TokenNameDOLLAR_LBRACE:
4051 case TokenNameLBRACE_DOLLAR:
4054 return "EOF"; //$NON-NLS-1$
4055 case TokenNameWHITESPACE:
4056 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4057 case TokenNameCOMMENT_LINE:
4058 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4059 case TokenNameCOMMENT_BLOCK:
4060 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4061 case TokenNameCOMMENT_PHPDOC:
4062 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4063 // case TokenNameHTML :
4064 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4067 return "__FILE__"; //$NON-NLS-1$
4069 return "__LINE__"; //$NON-NLS-1$
4070 case TokenNameCLASS_C:
4071 return "__CLASS__"; //$NON-NLS-1$
4072 case TokenNameMETHOD_C:
4073 return "__METHOD__"; //$NON-NLS-1$
4074 case TokenNameFUNC_C:
4075 return "__FUNCTION__"; //$NON-NLS-1
4076 case TokenNameboolCAST:
4077 return "( bool )"; //$NON-NLS-1$
4078 case TokenNameintCAST:
4079 return "( int )"; //$NON-NLS-1$
4080 case TokenNamedoubleCAST:
4081 return "( double )"; //$NON-NLS-1$
4082 case TokenNameobjectCAST:
4083 return "( object )"; //$NON-NLS-1$
4084 case TokenNamestringCAST:
4085 return "( string )"; //$NON-NLS-1$
4087 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4095 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4096 this(tokenizeComments, tokenizeWhiteSpace, false);
4099 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4100 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4103 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4104 boolean assertMode) {
4105 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4108 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4109 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4110 this.eofPosition = Integer.MAX_VALUE;
4111 this.tokenizeComments = tokenizeComments;
4112 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4113 this.tokenizeStrings = tokenizeStrings;
4114 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4115 this.assertMode = assertMode;
4116 this.encapsedStringStack = null;
4117 this.taskTags = taskTags;
4118 this.taskPriorities = taskPriorities;
4121 private void checkNonExternalizeString() throws InvalidInputException {
4122 if (currentLine == null)
4124 parseTags(currentLine);
4127 private void parseTags(NLSLine line) throws InvalidInputException {
4128 String s = new String(getCurrentTokenSource());
4129 int pos = s.indexOf(TAG_PREFIX);
4130 int lineLength = line.size();
4132 int start = pos + TAG_PREFIX_LENGTH;
4133 int end = s.indexOf(TAG_POSTFIX, start);
4134 String index = s.substring(start, end);
4137 i = Integer.parseInt(index) - 1;
4138 // Tags are one based not zero based.
4139 } catch (NumberFormatException e) {
4140 i = -1; // we don't want to consider this as a valid NLS tag
4142 if (line.exists(i)) {
4145 pos = s.indexOf(TAG_PREFIX, start);
4147 this.nonNLSStrings = new StringLiteral[lineLength];
4148 int nonNLSCounter = 0;
4149 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4150 StringLiteral literal = (StringLiteral) iterator.next();
4151 if (literal != null) {
4152 this.nonNLSStrings[nonNLSCounter++] = literal;
4155 if (nonNLSCounter == 0) {
4156 this.nonNLSStrings = null;
4160 this.wasNonExternalizedStringLiteral = true;
4161 if (nonNLSCounter != lineLength) {
4162 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4167 public final void scanEscapeCharacter() throws InvalidInputException {
4168 // the string with "\\u" is a legal string of two chars \ and u
4169 //thus we use a direct access to the source (for regular cases).
4170 if (unicodeAsBackSlash) {
4171 // consume next character
4172 unicodeAsBackSlash = false;
4173 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4174 // (source[currentPosition] == 'u')) {
4175 // getNextUnicodeChar();
4177 if (withoutUnicodePtr != 0) {
4178 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4182 currentCharacter = source[currentPosition++];
4183 switch (currentCharacter) {
4185 currentCharacter = '\b';
4188 currentCharacter = '\t';
4191 currentCharacter = '\n';
4194 currentCharacter = '\f';
4197 currentCharacter = '\r';
4200 currentCharacter = '\"';
4203 currentCharacter = '\'';
4206 currentCharacter = '\\';
4209 // -----------octal escape--------------
4211 // OctalDigit OctalDigit
4212 // ZeroToThree OctalDigit OctalDigit
4213 int number = Character.getNumericValue(currentCharacter);
4214 if (number >= 0 && number <= 7) {
4215 boolean zeroToThreeNot = number > 3;
4216 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4217 int digit = Character.getNumericValue(currentCharacter);
4218 if (digit >= 0 && digit <= 7) {
4219 number = (number * 8) + digit;
4220 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4221 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4222 // Digit --> ignore last character
4225 digit = Character.getNumericValue(currentCharacter);
4226 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4227 // OctalDigit OctalDigit
4228 number = (number * 8) + digit;
4229 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4230 // --> ignore last character
4234 } else { // has read \OctalDigit NonDigit--> ignore last
4238 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4242 } else { // has read \OctalDigit --> ignore last character
4246 throw new InvalidInputException(INVALID_ESCAPE);
4247 currentCharacter = (char) number;
4249 throw new InvalidInputException(INVALID_ESCAPE);
4253 //chech presence of task: tags
4254 //TODO (frederic) see if we need to take unicode characters into account...
4255 public void checkTaskTag(int commentStart, int commentEnd) {
4256 char[] src = this.source;
4258 // only look for newer task: tags
4259 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4262 int foundTaskIndex = this.foundTaskCount;
4263 char previous = src[commentStart + 1]; // should be '*' or '/'
4264 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4266 char[] priority = null;
4267 // check for tag occurrence only if not ambiguous with javadoc tag
4268 if (previous != '@') {
4269 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4270 tag = this.taskTags[itag];
4271 int tagLength = tag.length;
4275 // ensure tag is not leaded with letter if tag starts with a letter
4276 if (Scanner.isPHPIdentifierStart(tag[0])) {
4277 if (Scanner.isPHPIdentifierPart(previous)) {
4282 for (int t = 0; t < tagLength; t++) {
4285 if (x >= this.eofPosition || x >= commentEnd)
4287 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4288 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4293 // ensure tag is not followed with letter if tag finishes with a letter
4294 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4295 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4298 if (this.foundTaskTags == null) {
4299 this.foundTaskTags = new char[5][];
4300 this.foundTaskMessages = new char[5][];
4301 this.foundTaskPriorities = new char[5][];
4302 this.foundTaskPositions = new int[5][];
4303 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4304 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4305 this.foundTaskCount);
4306 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4307 this.foundTaskCount);
4308 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4309 this.foundTaskCount);
4310 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4311 this.foundTaskCount);
4314 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4316 this.foundTaskTags[this.foundTaskCount] = tag;
4317 this.foundTaskPriorities[this.foundTaskCount] = priority;
4318 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4319 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4320 this.foundTaskCount++;
4321 i += tagLength - 1; // will be incremented when looping
4327 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4328 // retrieve message start and end positions
4329 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4330 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4331 // at most beginning of next task
4332 if (max_value < msgStart) {
4333 max_value = msgStart; // would only occur if tag is before EOF.
4337 for (int j = msgStart; j < max_value; j++) {
4338 if ((c = src[j]) == '\n' || c == '\r') {
4344 for (int j = max_value; j > msgStart; j--) {
4345 if ((c = src[j]) == '*') {
4353 if (msgStart == end)
4356 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4358 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4360 // update the end position of the task
4361 this.foundTaskPositions[i][1] = end;
4362 // get the message source
4363 final int messageLength = end - msgStart + 1;
4364 char[] message = new char[messageLength];
4365 System.arraycopy(src, msgStart, message, 0, messageLength);
4366 this.foundTaskMessages[i] = message;
4370 // chech presence of task: tags
4371 // public void checkTaskTag(int commentStart, int commentEnd) {
4372 // // only look for newer task: tags
4373 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4376 // int foundTaskIndex = this.foundTaskCount;
4377 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4378 // char[] tag = null;
4379 // char[] priority = null;
4380 // // check for tag occurrence
4381 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4382 // tag = this.taskTags[itag];
4383 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4384 // int tagLength = tag.length;
4385 // for (int t = 0; t < tagLength; t++) {
4386 // if (this.source[i + t] != tag[t])
4387 // continue nextTag;
4389 // if (this.foundTaskTags == null) {
4390 // this.foundTaskTags = new char[5][];
4391 // this.foundTaskMessages = new char[5][];
4392 // this.foundTaskPriorities = new char[5][];
4393 // this.foundTaskPositions = new int[5][];
4394 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4395 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4396 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4397 // this.foundTaskCount);
4398 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4399 // this.foundTaskCount);
4400 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4401 // this.foundTaskCount);
4403 // this.foundTaskTags[this.foundTaskCount] = tag;
4404 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4405 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4406 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4407 // this.foundTaskCount++;
4408 // i += tagLength - 1; // will be incremented when looping
4411 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4412 // // retrieve message start and end positions
4413 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4414 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4415 // // at most beginning of next task
4416 // if (max_value < msgStart)
4417 // max_value = msgStart; // would only occur if tag is before EOF.
4420 // for (int j = msgStart; j < max_value; j++) {
4421 // if ((c = this.source[j]) == '\n' || c == '\r') {
4427 // for (int j = max_value; j > msgStart; j--) {
4428 // if ((c = this.source[j]) == '*') {
4436 // if (msgStart == end)
4437 // continue; // empty
4438 // // trim the message
4439 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4441 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4443 // // update the end position of the task
4444 // this.foundTaskPositions[i][1] = end;
4445 // // get the message source
4446 // final int messageLength = end - msgStart + 1;
4447 // char[] message = new char[messageLength];
4448 // System.arraycopy(source, msgStart, message, 0, messageLength);
4449 // this.foundTaskMessages[i] = message;