1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public Stack encapsedStringStack = null;
44 public char currentCharacter;
46 public int startPosition;
48 public int currentPosition;
50 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the
54 public boolean tokenizeComments;
56 public boolean tokenizeWhiteSpace;
58 public boolean tokenizeStrings;
60 //source should be viewed as a window (aka a part)
61 //of a entire very large stream
65 public char[] withoutUnicodeBuffer;
67 public int withoutUnicodePtr;
69 //when == 0 ==> no unicode in the current token
70 public boolean unicodeAsBackSlash = false;
72 public boolean scanningFloatLiteral = false;
74 //support for /** comments
75 public int[] commentStops = new int[10];
77 public int[] commentStarts = new int[10];
79 public int commentPtr = -1; // no comment test with commentPtr value -1
81 protected int lastCommentLinePosition = -1;
83 //diet parsing support - jump over some method body when requested
84 public boolean diet = false;
86 //support for the poor-line-debuggers ....
87 //remember the position of the cr/lf
88 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
92 public boolean wasAcr = false;
94 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
96 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
98 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
100 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
102 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
104 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
106 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
108 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
110 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
112 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
116 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
118 //----------------optimized identifier managment------------------
119 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
120 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
121 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
122 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
123 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
124 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
125 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
126 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
127 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
129 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
131 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
132 charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
133 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
134 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
135 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
136 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
137 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
138 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
140 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
142 static final int TableSize = 30, InternalTableSize = 6;
145 public static final int OptimizedLength = 6;
148 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
150 // support for detecting non-externalized string literals
151 int currentLineNr = -1;
153 int previousLineNr = -1;
155 NLSLine currentLine = null;
157 List lines = new ArrayList();
159 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
161 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
163 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
165 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
167 public StringLiteral[] nonNLSStrings = null;
169 public boolean checkNonExternalizedStringLiterals = true;
171 public boolean wasNonExternalizedStringLiteral = false;
173 for (int i = 0; i < 6; i++) {
174 for (int j = 0; j < TableSize; j++) {
175 for (int k = 0; k < InternalTableSize; k++) {
176 charArray_length[i][j][k] = initCharArray;
182 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
184 public static final int RoundBracket = 0;
186 public static final int SquareBracket = 1;
188 public static final int CurlyBracket = 2;
190 public static final int BracketKinds = 3;
193 public char[][] foundTaskTags = null;
195 public char[][] foundTaskMessages;
197 public char[][] foundTaskPriorities = null;
199 public int[][] foundTaskPositions;
201 public int foundTaskCount = 0;
203 public char[][] taskTags = null;
205 public char[][] taskPriorities = null;
207 public boolean isTaskCaseSensitive = true;
209 public static final boolean DEBUG = false;
211 public static final boolean TRACE = false;
213 public ICompilationUnit compilationUnit = null;
216 * Determines if the specified character is permissible as the first character in a PHP identifier or
219 * The '$' character for PHP variables is regarded as a correct first character !
222 public static boolean isPHPIdentOrVarStart(char ch) {
223 return Character.isLetter(ch) || (ch == '$') ||(ch == '_') || (0x7F <= ch && ch <= 0xFF);
227 * Determines if the specified character is permissible as the first character in a PHP identifier.
229 * The '$' character for PHP variables isn't regarded as the first character !
231 public static boolean isPHPIdentifierStart(char ch) {
232 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
236 * Determines if the specified character may be part of a PHP identifier as other than the first character
238 public static boolean isPHPIdentifierPart(char ch) {
239 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
242 public final boolean atEnd() {
243 // This code is not relevant if source is
244 // Only a part of the real stream input
245 return source.length == currentPosition;
248 public char[] getCurrentIdentifierSource() {
249 //return the token REAL source (aka unicodes are precomputed)
251 // if (withoutUnicodePtr != 0)
252 // //0 is used as a fast test flag so the real first char is in position 1
254 // withoutUnicodeBuffer,
256 // result = new char[withoutUnicodePtr],
258 // withoutUnicodePtr);
260 int length = currentPosition - startPosition;
261 switch (length) { // see OptimizedLength
263 return optimizedCurrentTokenSource1();
265 return optimizedCurrentTokenSource2();
267 return optimizedCurrentTokenSource3();
269 return optimizedCurrentTokenSource4();
271 return optimizedCurrentTokenSource5();
273 return optimizedCurrentTokenSource6();
276 System.arraycopy(source, startPosition, result = new char[length], 0, length);
281 public int getCurrentTokenEndPosition() {
282 return this.currentPosition - 1;
285 public final char[] getCurrentTokenSource() {
286 // Return the token REAL source (aka unicodes are precomputed)
288 // if (withoutUnicodePtr != 0)
289 // // 0 is used as a fast test flag so the real first char is in position 1
291 // withoutUnicodeBuffer,
293 // result = new char[withoutUnicodePtr],
295 // withoutUnicodePtr);
298 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
303 public final char[] getCurrentTokenSource(int startPos) {
304 // Return the token REAL source (aka unicodes are precomputed)
306 // if (withoutUnicodePtr != 0)
307 // // 0 is used as a fast test flag so the real first char is in position 1
309 // withoutUnicodeBuffer,
311 // result = new char[withoutUnicodePtr],
313 // withoutUnicodePtr);
316 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
321 public final char[] getCurrentTokenSourceString() {
322 //return the token REAL source (aka unicodes are precomputed).
323 //REMOVE the two " that are at the beginning and the end.
325 if (withoutUnicodePtr != 0)
326 //0 is used as a fast test flag so the real first char is in position 1
327 System.arraycopy(withoutUnicodeBuffer, 2,
328 //2 is 1 (real start) + 1 (to jump over the ")
329 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
332 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
337 public final char[] getRawTokenSourceEnd() {
338 int length = this.eofPosition - this.currentPosition - 1;
339 char[] sourceEnd = new char[length];
340 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
343 public int getCurrentTokenStartPosition() {
344 return this.startPosition;
347 public final char[] getCurrentStringLiteralSource() {
348 // Return the token REAL source (aka unicodes are precomputed)
349 if (startPosition + 1 >= currentPosition) {
354 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
359 public final char[] getCurrentStringLiteralSource(int startPos) {
360 // Return the token REAL source (aka unicodes are precomputed)
363 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
369 * Search the source position corresponding to the end of a given line number
371 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
373 * In case the given line number is inconsistent, answers -1.
375 public final int getLineEnd(int lineNumber) {
376 if (lineEnds == null)
378 if (lineNumber >= lineEnds.length)
382 if (lineNumber == lineEnds.length - 1)
384 return lineEnds[lineNumber - 1];
385 // next line start one character behind the lineEnd of the previous line
389 * Search the source position corresponding to the beginning of a given line number
391 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
393 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
395 * In case the given line number is inconsistent, answers -1.
397 public final int getLineStart(int lineNumber) {
398 if (lineEnds == null)
400 if (lineNumber >= lineEnds.length)
405 return initialPosition;
406 return lineEnds[lineNumber - 2] + 1;
407 // next line start one character behind the lineEnd of the previous line
410 public final boolean getNextChar(char testedChar) {
412 //handle the case of unicode.
413 //when a unicode appears then we must use a buffer that holds char
415 //At the end of this method currentCharacter holds the new visited char
416 //and currentPosition points right next after it
417 //Both previous lines are true if the currentCharacter is == to the
419 //On false, no side effect has occured.
420 //ALL getNextChar.... ARE OPTIMIZED COPIES
421 int temp = currentPosition;
423 currentCharacter = source[currentPosition++];
424 // if (((currentCharacter = source[currentPosition++]) == '\\')
425 // && (source[currentPosition] == 'u')) {
426 // //-------------unicode traitement ------------
427 // int c1, c2, c3, c4;
428 // int unicodeSize = 6;
429 // currentPosition++;
430 // while (source[currentPosition] == 'u') {
431 // currentPosition++;
435 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
437 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
439 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
441 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
443 // currentPosition = temp;
447 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
448 // if (currentCharacter != testedChar) {
449 // currentPosition = temp;
452 // unicodeAsBackSlash = currentCharacter == '\\';
454 // //need the unicode buffer
455 // if (withoutUnicodePtr == 0) {
456 // //buffer all the entries that have been left aside....
457 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
461 // withoutUnicodeBuffer,
463 // withoutUnicodePtr);
465 // //fill the buffer with the char
466 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
469 // } //-------------end unicode traitement--------------
471 if (currentCharacter != testedChar) {
472 currentPosition = temp;
475 unicodeAsBackSlash = false;
476 // if (withoutUnicodePtr != 0)
477 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
480 } catch (IndexOutOfBoundsException e) {
481 unicodeAsBackSlash = false;
482 currentPosition = temp;
487 public final int getNextChar(char testedChar1, char testedChar2) {
488 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
489 //test can be done with (x==0) for the first and (x>0) for the second
490 //handle the case of unicode.
491 //when a unicode appears then we must use a buffer that holds char
493 //At the end of this method currentCharacter holds the new visited char
494 //and currentPosition points right next after it
495 //Both previous lines are true if the currentCharacter is == to the
497 //On false, no side effect has occured.
498 //ALL getNextChar.... ARE OPTIMIZED COPIES
499 int temp = currentPosition;
502 currentCharacter = source[currentPosition++];
503 // if (((currentCharacter = source[currentPosition++]) == '\\')
504 // && (source[currentPosition] == 'u')) {
505 // //-------------unicode traitement ------------
506 // int c1, c2, c3, c4;
507 // int unicodeSize = 6;
508 // currentPosition++;
509 // while (source[currentPosition] == 'u') {
510 // currentPosition++;
514 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
516 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
518 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
520 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
522 // currentPosition = temp;
526 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
527 // if (currentCharacter == testedChar1)
529 // else if (currentCharacter == testedChar2)
532 // currentPosition = temp;
536 // //need the unicode buffer
537 // if (withoutUnicodePtr == 0) {
538 // //buffer all the entries that have been left aside....
539 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
543 // withoutUnicodeBuffer,
545 // withoutUnicodePtr);
547 // //fill the buffer with the char
548 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
550 // } //-------------end unicode traitement--------------
552 if (currentCharacter == testedChar1)
554 else if (currentCharacter == testedChar2)
557 currentPosition = temp;
560 // if (withoutUnicodePtr != 0)
561 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
564 } catch (IndexOutOfBoundsException e) {
565 currentPosition = temp;
570 public final boolean getNextCharAsDigit() {
572 //handle the case of unicode.
573 //when a unicode appears then we must use a buffer that holds char
575 //At the end of this method currentCharacter holds the new visited char
576 //and currentPosition points right next after it
577 //Both previous lines are true if the currentCharacter is a digit
578 //On false, no side effect has occured.
579 //ALL getNextChar.... ARE OPTIMIZED COPIES
580 int temp = currentPosition;
582 currentCharacter = source[currentPosition++];
583 // if (((currentCharacter = source[currentPosition++]) == '\\')
584 // && (source[currentPosition] == 'u')) {
585 // //-------------unicode traitement ------------
586 // int c1, c2, c3, c4;
587 // int unicodeSize = 6;
588 // currentPosition++;
589 // while (source[currentPosition] == 'u') {
590 // currentPosition++;
594 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
596 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
598 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
600 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
602 // currentPosition = temp;
606 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
607 // if (!Character.isDigit(currentCharacter)) {
608 // currentPosition = temp;
612 // //need the unicode buffer
613 // if (withoutUnicodePtr == 0) {
614 // //buffer all the entries that have been left aside....
615 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
619 // withoutUnicodeBuffer,
621 // withoutUnicodePtr);
623 // //fill the buffer with the char
624 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
626 // } //-------------end unicode traitement--------------
628 if (!Character.isDigit(currentCharacter)) {
629 currentPosition = temp;
632 // if (withoutUnicodePtr != 0)
633 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
636 } catch (IndexOutOfBoundsException e) {
637 currentPosition = temp;
642 public final boolean getNextCharAsDigit(int radix) {
644 //handle the case of unicode.
645 //when a unicode appears then we must use a buffer that holds char
647 //At the end of this method currentCharacter holds the new visited char
648 //and currentPosition points right next after it
649 //Both previous lines are true if the currentCharacter is a digit base on
651 //On false, no side effect has occured.
652 //ALL getNextChar.... ARE OPTIMIZED COPIES
653 int temp = currentPosition;
655 currentCharacter = source[currentPosition++];
656 // if (((currentCharacter = source[currentPosition++]) == '\\')
657 // && (source[currentPosition] == 'u')) {
658 // //-------------unicode traitement ------------
659 // int c1, c2, c3, c4;
660 // int unicodeSize = 6;
661 // currentPosition++;
662 // while (source[currentPosition] == 'u') {
663 // currentPosition++;
667 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
669 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
671 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
673 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
675 // currentPosition = temp;
679 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
680 // if (Character.digit(currentCharacter, radix) == -1) {
681 // currentPosition = temp;
685 // //need the unicode buffer
686 // if (withoutUnicodePtr == 0) {
687 // //buffer all the entries that have been left aside....
688 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
692 // withoutUnicodeBuffer,
694 // withoutUnicodePtr);
696 // //fill the buffer with the char
697 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
699 // } //-------------end unicode traitement--------------
701 if (Character.digit(currentCharacter, radix) == -1) {
702 currentPosition = temp;
705 // if (withoutUnicodePtr != 0)
706 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
709 } catch (IndexOutOfBoundsException e) {
710 currentPosition = temp;
715 public boolean getNextCharAsJavaIdentifierPart() {
717 //handle the case of unicode.
718 //when a unicode appears then we must use a buffer that holds char
720 //At the end of this method currentCharacter holds the new visited char
721 //and currentPosition points right next after it
722 //Both previous lines are true if the currentCharacter is a
723 // JavaIdentifierPart
724 //On false, no side effect has occured.
725 //ALL getNextChar.... ARE OPTIMIZED COPIES
726 int temp = currentPosition;
728 currentCharacter = source[currentPosition++];
729 // if (((currentCharacter = source[currentPosition++]) == '\\')
730 // && (source[currentPosition] == 'u')) {
731 // //-------------unicode traitement ------------
732 // int c1, c2, c3, c4;
733 // int unicodeSize = 6;
734 // currentPosition++;
735 // while (source[currentPosition] == 'u') {
736 // currentPosition++;
740 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
742 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
744 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
746 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
748 // currentPosition = temp;
752 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
753 // if (!isPHPIdentifierPart(currentCharacter)) {
754 // currentPosition = temp;
758 // //need the unicode buffer
759 // if (withoutUnicodePtr == 0) {
760 // //buffer all the entries that have been left aside....
761 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
765 // withoutUnicodeBuffer,
767 // withoutUnicodePtr);
769 // //fill the buffer with the char
770 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
772 // } //-------------end unicode traitement--------------
774 if (!isPHPIdentifierPart(currentCharacter)) {
775 currentPosition = temp;
778 // if (withoutUnicodePtr != 0)
779 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
782 } catch (IndexOutOfBoundsException e) {
783 currentPosition = temp;
788 public int getCastOrParen() {
789 int tempPosition = currentPosition;
790 char tempCharacter = currentCharacter;
791 int tempToken = TokenNameLPAREN;
792 boolean found = false;
793 StringBuffer buf = new StringBuffer();
796 currentCharacter = source[currentPosition++];
797 } while (currentCharacter == ' ' || currentCharacter == '\t');
798 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
799 buf.append(currentCharacter);
800 currentCharacter = source[currentPosition++];
802 if (buf.length() >= 3 && buf.length() <= 7) {
803 char[] data = buf.toString().toCharArray();
805 switch (data.length) {
808 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
810 tempToken = TokenNameintCAST;
815 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
817 tempToken = TokenNameboolCAST;
820 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
822 tempToken = TokenNamedoubleCAST;
828 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
829 && (data[++index] == 'y')) {
831 tempToken = TokenNamearrayCAST;
834 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
835 && (data[++index] == 't')) {
837 tempToken = TokenNameunsetCAST;
840 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
841 && (data[++index] == 't')) {
843 tempToken = TokenNamedoubleCAST;
849 // object string double
850 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
851 && (data[++index] == 'c') && (data[++index] == 't')) {
853 tempToken = TokenNameobjectCAST;
856 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
857 && (data[++index] == 'n') && (data[++index] == 'g')) {
859 tempToken = TokenNamestringCAST;
862 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
863 && (data[++index] == 'l') && (data[++index] == 'e')) {
865 tempToken = TokenNamedoubleCAST;
872 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
873 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
875 tempToken = TokenNameboolCAST;
878 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
879 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
881 tempToken = TokenNameintCAST;
887 while (currentCharacter == ' ' || currentCharacter == '\t') {
888 currentCharacter = source[currentPosition++];
890 if (currentCharacter == ')') {
895 } catch (IndexOutOfBoundsException e) {
897 currentCharacter = tempCharacter;
898 currentPosition = tempPosition;
899 return TokenNameLPAREN;
902 public void consumeStringInterpolated() throws InvalidInputException {
904 // consume next character
905 unicodeAsBackSlash = false;
906 currentCharacter = source[currentPosition++];
907 // if (((currentCharacter = source[currentPosition++]) == '\\')
908 // && (source[currentPosition] == 'u')) {
909 // getNextUnicodeChar();
911 // if (withoutUnicodePtr != 0) {
912 // withoutUnicodeBuffer[++withoutUnicodePtr] =
916 while (currentCharacter != '`') {
917 /** ** in PHP \r and \n are valid in string literals *** */
918 // if ((currentCharacter == '\n')
919 // || (currentCharacter == '\r')) {
920 // // relocate if finding another quote fairly close: thus unicode
921 // '/u000D' will be fully consumed
922 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
923 // if (currentPosition + lookAhead == source.length)
925 // if (source[currentPosition + lookAhead] == '\n')
927 // if (source[currentPosition + lookAhead] == '\"') {
928 // currentPosition += lookAhead + 1;
932 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
934 if (currentCharacter == '\\') {
935 int escapeSize = currentPosition;
936 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
937 //scanEscapeCharacter make a side effect on this value and we need
938 // the previous value few lines down this one
939 scanDoubleQuotedEscapeCharacter();
940 escapeSize = currentPosition - escapeSize;
941 if (withoutUnicodePtr == 0) {
942 //buffer all the entries that have been left aside....
943 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
944 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
945 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
946 } else { //overwrite the / in the buffer
947 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
948 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
949 // where only one is correct
954 // consume next character
955 unicodeAsBackSlash = false;
956 currentCharacter = source[currentPosition++];
957 // if (((currentCharacter = source[currentPosition++]) == '\\')
958 // && (source[currentPosition] == 'u')) {
959 // getNextUnicodeChar();
961 if (withoutUnicodePtr != 0) {
962 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
966 } catch (IndexOutOfBoundsException e) {
967 // reset end position for error reporting
968 currentPosition -= 2;
969 throw new InvalidInputException(UNTERMINATED_STRING);
970 } catch (InvalidInputException e) {
971 if (e.getMessage().equals(INVALID_ESCAPE)) {
972 // relocate if finding another quote fairly close: thus unicode
973 // '/u000D' will be fully consumed
974 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
975 if (currentPosition + lookAhead == source.length)
977 if (source[currentPosition + lookAhead] == '\n')
979 if (source[currentPosition + lookAhead] == '`') {
980 currentPosition += lookAhead + 1;
987 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
988 // //$NON-NLS-?$ where ? is an
990 if (currentLine == null) {
991 currentLine = new NLSLine();
992 lines.add(currentLine);
994 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
998 public void consumeStringConstant() throws InvalidInputException {
1000 // consume next character
1001 unicodeAsBackSlash = false;
1002 currentCharacter = source[currentPosition++];
1003 // if (((currentCharacter = source[currentPosition++]) == '\\')
1004 // && (source[currentPosition] == 'u')) {
1005 // getNextUnicodeChar();
1007 // if (withoutUnicodePtr != 0) {
1008 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1009 // currentCharacter;
1012 while (currentCharacter != '\'') {
1013 /** ** in PHP \r and \n are valid in string literals *** */
1014 // if ((currentCharacter == '\n')
1015 // || (currentCharacter == '\r')) {
1016 // // relocate if finding another quote fairly close: thus unicode
1017 // '/u000D' will be fully consumed
1018 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1019 // if (currentPosition + lookAhead == source.length)
1021 // if (source[currentPosition + lookAhead] == '\n')
1023 // if (source[currentPosition + lookAhead] == '\"') {
1024 // currentPosition += lookAhead + 1;
1028 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1030 if (currentCharacter == '\\') {
1031 int escapeSize = currentPosition;
1032 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1033 //scanEscapeCharacter make a side effect on this value and we need
1034 // the previous value few lines down this one
1035 scanSingleQuotedEscapeCharacter();
1036 escapeSize = currentPosition - escapeSize;
1037 if (withoutUnicodePtr == 0) {
1038 //buffer all the entries that have been left aside....
1039 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1040 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1041 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1042 } else { //overwrite the / in the buffer
1043 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1044 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1045 // where only one is correct
1046 withoutUnicodePtr--;
1050 // consume next character
1051 unicodeAsBackSlash = false;
1052 currentCharacter = source[currentPosition++];
1053 // if (((currentCharacter = source[currentPosition++]) == '\\')
1054 // && (source[currentPosition] == 'u')) {
1055 // getNextUnicodeChar();
1057 if (withoutUnicodePtr != 0) {
1058 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1062 } catch (IndexOutOfBoundsException e) {
1063 // reset end position for error reporting
1064 currentPosition -= 2;
1065 throw new InvalidInputException(UNTERMINATED_STRING);
1066 } catch (InvalidInputException e) {
1067 if (e.getMessage().equals(INVALID_ESCAPE)) {
1068 // relocate if finding another quote fairly close: thus unicode
1069 // '/u000D' will be fully consumed
1070 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1071 if (currentPosition + lookAhead == source.length)
1073 if (source[currentPosition + lookAhead] == '\n')
1075 if (source[currentPosition + lookAhead] == '\'') {
1076 currentPosition += lookAhead + 1;
1083 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1084 // //$NON-NLS-?$ where ? is an
1086 if (currentLine == null) {
1087 currentLine = new NLSLine();
1088 lines.add(currentLine);
1090 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1094 public void consumeStringLiteral() throws InvalidInputException {
1096 // consume next character
1097 unicodeAsBackSlash = false;
1098 currentCharacter = source[currentPosition++];
1099 // if (((currentCharacter = source[currentPosition++]) == '\\')
1100 // && (source[currentPosition] == 'u')) {
1101 // getNextUnicodeChar();
1103 // if (withoutUnicodePtr != 0) {
1104 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1105 // currentCharacter;
1108 while (currentCharacter != '"') {
1109 /** ** in PHP \r and \n are valid in string literals *** */
1110 // if ((currentCharacter == '\n')
1111 // || (currentCharacter == '\r')) {
1112 // // relocate if finding another quote fairly close: thus unicode
1113 // '/u000D' will be fully consumed
1114 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1115 // if (currentPosition + lookAhead == source.length)
1117 // if (source[currentPosition + lookAhead] == '\n')
1119 // if (source[currentPosition + lookAhead] == '\"') {
1120 // currentPosition += lookAhead + 1;
1124 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1126 if (currentCharacter == '\\') {
1127 int escapeSize = currentPosition;
1128 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1129 //scanEscapeCharacter make a side effect on this value and we need
1130 // the previous value few lines down this one
1131 scanDoubleQuotedEscapeCharacter();
1132 escapeSize = currentPosition - escapeSize;
1133 if (withoutUnicodePtr == 0) {
1134 //buffer all the entries that have been left aside....
1135 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1136 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1137 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1138 } else { //overwrite the / in the buffer
1139 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1140 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1141 // where only one is correct
1142 withoutUnicodePtr--;
1146 // consume next character
1147 unicodeAsBackSlash = false;
1148 currentCharacter = source[currentPosition++];
1149 // if (((currentCharacter = source[currentPosition++]) == '\\')
1150 // && (source[currentPosition] == 'u')) {
1151 // getNextUnicodeChar();
1153 if (withoutUnicodePtr != 0) {
1154 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1158 } catch (IndexOutOfBoundsException e) {
1159 // reset end position for error reporting
1160 currentPosition -= 2;
1161 throw new InvalidInputException(UNTERMINATED_STRING);
1162 } catch (InvalidInputException e) {
1163 if (e.getMessage().equals(INVALID_ESCAPE)) {
1164 // relocate if finding another quote fairly close: thus unicode
1165 // '/u000D' will be fully consumed
1166 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1167 if (currentPosition + lookAhead == source.length)
1169 if (source[currentPosition + lookAhead] == '\n')
1171 if (source[currentPosition + lookAhead] == '\"') {
1172 currentPosition += lookAhead + 1;
1179 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1180 // //$NON-NLS-?$ where ? is an
1182 if (currentLine == null) {
1183 currentLine = new NLSLine();
1184 lines.add(currentLine);
1186 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1190 public int getNextToken() throws InvalidInputException {
1192 return getInlinedHTML(currentPosition);
1195 this.wasAcr = false;
1197 jumpOverMethodBody();
1199 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1203 withoutUnicodePtr = 0;
1204 //start with a new token
1205 char encapsedChar = ' ';
1206 if (!encapsedStringStack.isEmpty()) {
1207 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1209 if (encapsedChar != '$' && encapsedChar != ' ') {
1210 currentCharacter = source[currentPosition++];
1211 if (currentCharacter == encapsedChar) {
1212 switch (currentCharacter) {
1214 return TokenNameEncapsedString0;
1216 return TokenNameEncapsedString1;
1218 return TokenNameEncapsedString2;
1221 while (currentCharacter != encapsedChar) {
1222 /** ** in PHP \r and \n are valid in string literals *** */
1223 switch (currentCharacter) {
1225 int escapeSize = currentPosition;
1226 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1227 //scanEscapeCharacter make a side effect on this value and
1228 // we need the previous value few lines down this one
1229 scanDoubleQuotedEscapeCharacter();
1230 escapeSize = currentPosition - escapeSize;
1231 if (withoutUnicodePtr == 0) {
1232 //buffer all the entries that have been left aside....
1233 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1234 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1235 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1236 } else { //overwrite the / in the buffer
1237 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1238 if (backSlashAsUnicodeInString) { //there are TWO \ in
1239 withoutUnicodePtr--;
1244 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1246 encapsedStringStack.push(new Character('$'));
1247 return TokenNameSTRING;
1251 if (source[currentPosition] == '$') { // CURLY_OPEN
1253 encapsedStringStack.push(new Character('$'));
1254 return TokenNameSTRING;
1257 // consume next character
1258 unicodeAsBackSlash = false;
1259 currentCharacter = source[currentPosition++];
1260 if (withoutUnicodePtr != 0) {
1261 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1266 return TokenNameSTRING;
1268 // ---------Consume white space and handles startPosition---------
1269 int whiteStart = currentPosition;
1270 startPosition = currentPosition;
1271 currentCharacter = source[currentPosition++];
1272 if (encapsedChar == '$') {
1273 switch (currentCharacter) {
1275 currentCharacter = source[currentPosition++];
1276 return TokenNameSTRING;
1278 if (encapsedChar == '$') {
1279 if (getNextChar('$'))
1280 return TokenNameLBRACE_DOLLAR;
1282 return TokenNameLBRACE;
1284 return TokenNameRBRACE;
1286 return TokenNameLBRACKET;
1288 return TokenNameRBRACKET;
1290 if (tokenizeStrings) {
1291 consumeStringConstant();
1292 return TokenNameStringSingleQuote;
1294 return TokenNameEncapsedString1;
1296 return TokenNameEncapsedString2;
1298 if (tokenizeStrings) {
1299 consumeStringInterpolated();
1300 return TokenNameStringInterpolated;
1302 return TokenNameEncapsedString0;
1304 if (getNextChar('>'))
1305 return TokenNameMINUS_GREATER;
1306 return TokenNameSTRING;
1308 if (currentCharacter == '$') {
1309 int oldPosition = currentPosition;
1311 currentCharacter = source[currentPosition++];
1312 if (currentCharacter == '{') {
1313 return TokenNameDOLLAR_LBRACE;
1315 if (isPHPIdentifierStart(currentCharacter)) {
1316 return scanIdentifierOrKeyword(true);
1318 currentPosition = oldPosition;
1319 return TokenNameSTRING;
1321 } catch (IndexOutOfBoundsException e) {
1322 currentPosition = oldPosition;
1323 return TokenNameSTRING;
1326 if (isPHPIdentifierStart(currentCharacter))
1327 return scanIdentifierOrKeyword(false);
1328 if (Character.isDigit(currentCharacter))
1329 return scanNumber(false);
1330 return TokenNameERROR;
1333 // boolean isWhiteSpace;
1335 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1336 startPosition = currentPosition;
1337 currentCharacter = source[currentPosition++];
1338 // if (((currentCharacter = source[currentPosition++]) == '\\')
1339 // && (source[currentPosition] == 'u')) {
1340 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1342 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1343 checkNonExternalizeString();
1344 if (recordLineSeparator) {
1345 pushLineSeparator();
1350 // isWhiteSpace = (currentCharacter == ' ')
1351 // || Character.isWhitespace(currentCharacter);
1354 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1355 // reposition scanner in case we are interested by spaces as tokens
1357 startPosition = whiteStart;
1358 return TokenNameWHITESPACE;
1360 //little trick to get out in the middle of a source compuation
1361 if (currentPosition > eofPosition)
1362 return TokenNameEOF;
1363 // ---------Identify the next token-------------
1364 switch (currentCharacter) {
1366 return getCastOrParen();
1368 return TokenNameRPAREN;
1370 return TokenNameLBRACE;
1372 return TokenNameRBRACE;
1374 return TokenNameLBRACKET;
1376 return TokenNameRBRACKET;
1378 return TokenNameSEMICOLON;
1380 return TokenNameCOMMA;
1382 if (getNextChar('='))
1383 return TokenNameDOT_EQUAL;
1384 if (getNextCharAsDigit())
1385 return scanNumber(true);
1386 return TokenNameDOT;
1389 if ((test = getNextChar('+', '=')) == 0)
1390 return TokenNamePLUS_PLUS;
1392 return TokenNamePLUS_EQUAL;
1393 return TokenNamePLUS;
1397 if ((test = getNextChar('-', '=')) == 0)
1398 return TokenNameMINUS_MINUS;
1400 return TokenNameMINUS_EQUAL;
1401 if (getNextChar('>'))
1402 return TokenNameMINUS_GREATER;
1403 return TokenNameMINUS;
1406 if (getNextChar('='))
1407 return TokenNameTWIDDLE_EQUAL;
1408 return TokenNameTWIDDLE;
1410 if (getNextChar('=')) {
1411 if (getNextChar('=')) {
1412 return TokenNameNOT_EQUAL_EQUAL;
1414 return TokenNameNOT_EQUAL;
1416 return TokenNameNOT;
1418 if (getNextChar('='))
1419 return TokenNameMULTIPLY_EQUAL;
1420 return TokenNameMULTIPLY;
1422 if (getNextChar('='))
1423 return TokenNameREMAINDER_EQUAL;
1424 return TokenNameREMAINDER;
1426 int oldPosition = currentPosition;
1428 currentCharacter = source[currentPosition++];
1429 } catch (IndexOutOfBoundsException e) {
1430 currentPosition = oldPosition;
1431 return TokenNameLESS;
1433 switch (currentCharacter) {
1435 return TokenNameLESS_EQUAL;
1437 return TokenNameNOT_EQUAL;
1439 if (getNextChar('='))
1440 return TokenNameLEFT_SHIFT_EQUAL;
1441 if (getNextChar('<')) {
1442 currentCharacter = source[currentPosition++];
1443 while (Character.isWhitespace(currentCharacter)) {
1444 currentCharacter = source[currentPosition++];
1446 int heredocStart = currentPosition - 1;
1447 int heredocLength = 0;
1448 if (isPHPIdentifierStart(currentCharacter)) {
1449 currentCharacter = source[currentPosition++];
1451 return TokenNameERROR;
1453 while (isPHPIdentifierPart(currentCharacter)) {
1454 currentCharacter = source[currentPosition++];
1456 heredocLength = currentPosition - heredocStart - 1;
1457 // heredoc end-tag determination
1458 boolean endTag = true;
1461 ch = source[currentPosition++];
1462 if (ch == '\r' || ch == '\n') {
1463 if (recordLineSeparator) {
1464 pushLineSeparator();
1468 for (int i = 0; i < heredocLength; i++) {
1469 if (source[currentPosition + i] != source[heredocStart + i]) {
1475 currentPosition += heredocLength - 1;
1476 currentCharacter = source[currentPosition++];
1477 break; // do...while loop
1483 return TokenNameHEREDOC;
1485 return TokenNameLEFT_SHIFT;
1487 currentPosition = oldPosition;
1488 return TokenNameLESS;
1492 if ((test = getNextChar('=', '>')) == 0)
1493 return TokenNameGREATER_EQUAL;
1495 if ((test = getNextChar('=', '>')) == 0)
1496 return TokenNameRIGHT_SHIFT_EQUAL;
1497 return TokenNameRIGHT_SHIFT;
1499 return TokenNameGREATER;
1502 if (getNextChar('=')) {
1503 if (getNextChar('=')) {
1504 return TokenNameEQUAL_EQUAL_EQUAL;
1506 return TokenNameEQUAL_EQUAL;
1508 if (getNextChar('>'))
1509 return TokenNameEQUAL_GREATER;
1510 return TokenNameEQUAL;
1513 if ((test = getNextChar('&', '=')) == 0)
1514 return TokenNameAND_AND;
1516 return TokenNameAND_EQUAL;
1517 return TokenNameAND;
1521 if ((test = getNextChar('|', '=')) == 0)
1522 return TokenNameOR_OR;
1524 return TokenNameOR_EQUAL;
1528 if (getNextChar('='))
1529 return TokenNameXOR_EQUAL;
1530 return TokenNameXOR;
1532 if (getNextChar('>')) {
1534 if (currentPosition == source.length) {
1536 return TokenNameINLINE_HTML;
1538 return getInlinedHTML(currentPosition - 2);
1540 return TokenNameQUESTION;
1542 if (getNextChar(':'))
1543 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1544 return TokenNameCOLON;
1548 consumeStringConstant();
1549 return TokenNameStringSingleQuote;
1551 if (tokenizeStrings) {
1552 consumeStringLiteral();
1553 return TokenNameStringDoubleQuote;
1555 return TokenNameEncapsedString2;
1557 if (tokenizeStrings) {
1558 consumeStringInterpolated();
1559 return TokenNameStringInterpolated;
1561 return TokenNameEncapsedString0;
1564 char startChar = currentCharacter;
1565 if (getNextChar('=') && startChar == '/') {
1566 return TokenNameDIVIDE_EQUAL;
1569 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1571 this.lastCommentLinePosition = this.currentPosition;
1572 int endPositionForLineComment = 0;
1573 try { //get the next char
1574 currentCharacter = source[currentPosition++];
1575 // if (((currentCharacter = source[currentPosition++])
1577 // && (source[currentPosition] == 'u')) {
1578 // //-------------unicode traitement ------------
1579 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1580 // currentPosition++;
1581 // while (source[currentPosition] == 'u') {
1582 // currentPosition++;
1585 // Character.getNumericValue(source[currentPosition++]))
1589 // Character.getNumericValue(source[currentPosition++]))
1593 // Character.getNumericValue(source[currentPosition++]))
1597 // Character.getNumericValue(source[currentPosition++]))
1601 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1603 // currentCharacter =
1604 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1607 //handle the \\u case manually into comment
1608 // if (currentCharacter == '\\') {
1609 // if (source[currentPosition] == '\\')
1610 // currentPosition++;
1611 // } //jump over the \\
1612 boolean isUnicode = false;
1613 while (currentCharacter != '\r' && currentCharacter != '\n') {
1614 this.lastCommentLinePosition = this.currentPosition;
1615 if (currentCharacter == '?') {
1616 if (getNextChar('>')) {
1617 startPosition = currentPosition - 2;
1619 return TokenNameINLINE_HTML;
1624 currentCharacter = source[currentPosition++];
1625 // if (((currentCharacter = source[currentPosition++])
1627 // && (source[currentPosition] == 'u')) {
1628 // isUnicode = true;
1629 // //-------------unicode traitement ------------
1630 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1631 // currentPosition++;
1632 // while (source[currentPosition] == 'u') {
1633 // currentPosition++;
1636 // Character.getNumericValue(source[currentPosition++]))
1640 // Character.getNumericValue(
1641 // source[currentPosition++]))
1645 // Character.getNumericValue(
1646 // source[currentPosition++]))
1650 // Character.getNumericValue(
1651 // source[currentPosition++]))
1655 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1657 // currentCharacter =
1658 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1661 //handle the \\u case manually into comment
1662 // if (currentCharacter == '\\') {
1663 // if (source[currentPosition] == '\\')
1664 // currentPosition++;
1665 // } //jump over the \\
1668 endPositionForLineComment = currentPosition - 6;
1670 endPositionForLineComment = currentPosition - 1;
1672 // recordComment(false);
1673 recordComment(TokenNameCOMMENT_LINE);
1674 if (this.taskTags != null)
1675 checkTaskTag(this.startPosition, this.currentPosition);
1676 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1677 checkNonExternalizeString();
1678 if (recordLineSeparator) {
1680 pushUnicodeLineSeparator();
1682 pushLineSeparator();
1688 if (tokenizeComments) {
1690 currentPosition = endPositionForLineComment;
1691 // reset one character behind
1693 return TokenNameCOMMENT_LINE;
1695 } catch (IndexOutOfBoundsException e) { //an eof will them
1697 if (tokenizeComments) {
1699 // reset one character behind
1700 return TokenNameCOMMENT_LINE;
1706 //traditional and annotation comment
1707 boolean isJavadoc = false, star = false;
1708 // consume next character
1709 unicodeAsBackSlash = false;
1710 currentCharacter = source[currentPosition++];
1711 // if (((currentCharacter = source[currentPosition++]) ==
1713 // && (source[currentPosition] == 'u')) {
1714 // getNextUnicodeChar();
1716 // if (withoutUnicodePtr != 0) {
1717 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1718 // currentCharacter;
1721 if (currentCharacter == '*') {
1725 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1726 checkNonExternalizeString();
1727 if (recordLineSeparator) {
1728 pushLineSeparator();
1733 try { //get the next char
1734 currentCharacter = source[currentPosition++];
1735 // if (((currentCharacter = source[currentPosition++])
1737 // && (source[currentPosition] == 'u')) {
1738 // //-------------unicode traitement ------------
1739 // getNextUnicodeChar();
1741 //handle the \\u case manually into comment
1742 // if (currentCharacter == '\\') {
1743 // if (source[currentPosition] == '\\')
1744 // currentPosition++;
1745 // //jump over the \\
1747 // empty comment is not a javadoc /**/
1748 if (currentCharacter == '/') {
1751 //loop until end of comment */
1752 while ((currentCharacter != '/') || (!star)) {
1753 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1754 checkNonExternalizeString();
1755 if (recordLineSeparator) {
1756 pushLineSeparator();
1761 star = currentCharacter == '*';
1763 currentCharacter = source[currentPosition++];
1764 // if (((currentCharacter = source[currentPosition++])
1766 // && (source[currentPosition] == 'u')) {
1767 // //-------------unicode traitement ------------
1768 // getNextUnicodeChar();
1770 //handle the \\u case manually into comment
1771 // if (currentCharacter == '\\') {
1772 // if (source[currentPosition] == '\\')
1773 // currentPosition++;
1774 // } //jump over the \\
1776 //recordComment(isJavadoc);
1778 recordComment(TokenNameCOMMENT_PHPDOC);
1780 recordComment(TokenNameCOMMENT_BLOCK);
1783 if (tokenizeComments) {
1785 return TokenNameCOMMENT_PHPDOC;
1786 return TokenNameCOMMENT_BLOCK;
1789 if (this.taskTags != null) {
1790 checkTaskTag(this.startPosition, this.currentPosition);
1792 } catch (IndexOutOfBoundsException e) {
1793 // reset end position for error reporting
1794 currentPosition -= 2;
1795 throw new InvalidInputException(UNTERMINATED_COMMENT);
1799 return TokenNameDIVIDE;
1803 return TokenNameEOF;
1804 //the atEnd may not be <currentPosition == source.length> if
1805 // source is only some part of a real (external) stream
1806 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1808 if (currentCharacter == '$') {
1809 int oldPosition = currentPosition;
1811 currentCharacter = source[currentPosition++];
1812 if (isPHPIdentifierStart(currentCharacter)) {
1813 return scanIdentifierOrKeyword(true);
1815 currentPosition = oldPosition;
1816 return TokenNameDOLLAR;
1818 } catch (IndexOutOfBoundsException e) {
1819 currentPosition = oldPosition;
1820 return TokenNameDOLLAR;
1823 if (isPHPIdentifierStart(currentCharacter))
1824 return scanIdentifierOrKeyword(false);
1825 if (Character.isDigit(currentCharacter))
1826 return scanNumber(false);
1827 return TokenNameERROR;
1830 } //-----------------end switch while try--------------------
1831 catch (IndexOutOfBoundsException e) {
1834 return TokenNameEOF;
1837 private int getInlinedHTML(int start) throws InvalidInputException {
1838 int token = getInlinedHTMLToken(start);
1839 if (token == TokenNameINLINE_HTML) {
1840 // Stack stack = new Stack();
1841 // // scan html for errors
1842 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1843 // int lastPHPEndPos=0;
1844 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1845 // Tag tag=(Tag)i.next();
1847 // if (tag instanceof StartTag) {
1848 // StartTag startTag=(StartTag)tag;
1849 // // System.out.println("startTag: "+tag);
1850 // if (startTag.isServerTag()) {
1851 // // TODO : what to do with a server tag ?
1853 // // do whatever with HTML start tag
1854 // // use startTag.getElement() to find the element corresponding
1855 // // to this start tag which may be useful if you implement code
1857 // stack.push(startTag);
1860 // EndTag endTag=(EndTag)tag;
1861 // StartTag stag = (StartTag) stack.peek();
1862 //// System.out.println("endTag: "+tag);
1863 // // do whatever with HTML end tag.
1872 * @throws InvalidInputException
1874 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1875 if (currentPosition > source.length) {
1876 currentPosition = source.length;
1877 return TokenNameEOF;
1879 startPosition = start;
1882 currentCharacter = source[currentPosition++];
1883 if (currentCharacter == '<') {
1884 if (getNextChar('?')) {
1885 currentCharacter = source[currentPosition++];
1886 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1888 // (currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1890 if (ignorePHPOneLiner) {
1891 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1893 return TokenNameINLINE_HTML;
1897 return TokenNameINLINE_HTML;
1900 // boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1902 int test = getNextChar('H', 'h');
1904 test = getNextChar('P', 'p');
1907 if (ignorePHPOneLiner) {
1908 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1910 return TokenNameINLINE_HTML;
1914 return TokenNameINLINE_HTML;
1922 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1923 if (recordLineSeparator) {
1924 pushLineSeparator();
1929 } //-----------------while--------------------
1931 return TokenNameINLINE_HTML;
1932 } //-----------------try--------------------
1933 catch (IndexOutOfBoundsException e) {
1934 startPosition = start;
1938 return TokenNameINLINE_HTML;
1944 private int lookAheadLinePHPTag() {
1945 // check if the PHP is only in this line (for CodeFormatter)
1946 int currentPositionInLine = currentPosition;
1947 char previousCharInLine = ' ';
1948 char currentCharInLine = ' ';
1949 boolean singleQuotedStringActive = false;
1950 boolean doubleQuotedStringActive = false;
1953 // look ahead in this line
1955 previousCharInLine = currentCharInLine;
1956 currentCharInLine = source[currentPositionInLine++];
1957 switch (currentCharInLine) {
1959 if (previousCharInLine == '?') {
1960 // update the scanner's current Position in the source
1961 currentPosition = currentPositionInLine;
1962 // use as "dummy" token
1963 return TokenNameEOF;
1967 if (doubleQuotedStringActive) {
1968 if (previousCharInLine != '\\') {
1969 doubleQuotedStringActive = false;
1972 if (!singleQuotedStringActive) {
1973 doubleQuotedStringActive = true;
1978 if (singleQuotedStringActive) {
1979 if (previousCharInLine != '\\') {
1980 singleQuotedStringActive = false;
1983 if (!doubleQuotedStringActive) {
1984 singleQuotedStringActive = true;
1990 return TokenNameINLINE_HTML;
1992 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1994 return TokenNameINLINE_HTML;
1998 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2000 return TokenNameINLINE_HTML;
2004 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2006 return TokenNameINLINE_HTML;
2011 } catch (IndexOutOfBoundsException e) {
2013 currentPosition = currentPositionInLine;
2014 return TokenNameINLINE_HTML;
2018 // public final void getNextUnicodeChar()
2019 // throws IndexOutOfBoundsException, InvalidInputException {
2021 // //handle the case of unicode.
2022 // //when a unicode appears then we must use a buffer that holds char
2024 // //At the end of this method currentCharacter holds the new visited char
2025 // //and currentPosition points right next after it
2027 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2029 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2030 // currentPosition++;
2031 // while (source[currentPosition] == 'u') {
2032 // currentPosition++;
2036 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2038 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2040 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2042 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2044 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2046 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2047 // //need the unicode buffer
2048 // if (withoutUnicodePtr == 0) {
2049 // //buffer all the entries that have been left aside....
2050 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2051 // System.arraycopy(
2054 // withoutUnicodeBuffer,
2056 // withoutUnicodePtr);
2058 // //fill the buffer with the char
2059 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2061 // unicodeAsBackSlash = currentCharacter == '\\';
2064 * Tokenize a method body, assuming that curly brackets are properly balanced.
2066 public final void jumpOverMethodBody() {
2067 this.wasAcr = false;
2070 while (true) { //loop for jumping over comments
2071 // ---------Consume white space and handles startPosition---------
2072 boolean isWhiteSpace;
2074 startPosition = currentPosition;
2075 currentCharacter = source[currentPosition++];
2076 // if (((currentCharacter = source[currentPosition++]) == '\\')
2077 // && (source[currentPosition] == 'u')) {
2078 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2080 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2081 pushLineSeparator();
2082 isWhiteSpace = Character.isWhitespace(currentCharacter);
2084 } while (isWhiteSpace);
2085 // -------consume token until } is found---------
2086 switch (currentCharacter) {
2097 test = getNextChar('\\');
2100 scanDoubleQuotedEscapeCharacter();
2101 } catch (InvalidInputException ex) {
2105 // try { // consume next character
2106 unicodeAsBackSlash = false;
2107 currentCharacter = source[currentPosition++];
2108 // if (((currentCharacter = source[currentPosition++]) == '\\')
2109 // && (source[currentPosition] == 'u')) {
2110 // getNextUnicodeChar();
2112 if (withoutUnicodePtr != 0) {
2113 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2116 // } catch (InvalidInputException ex) {
2124 // try { // consume next character
2125 unicodeAsBackSlash = false;
2126 currentCharacter = source[currentPosition++];
2127 // if (((currentCharacter = source[currentPosition++]) == '\\')
2128 // && (source[currentPosition] == 'u')) {
2129 // getNextUnicodeChar();
2131 if (withoutUnicodePtr != 0) {
2132 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2135 // } catch (InvalidInputException ex) {
2137 while (currentCharacter != '"') {
2138 if (currentCharacter == '\r') {
2139 if (source[currentPosition] == '\n')
2142 // the string cannot go further that the line
2144 if (currentCharacter == '\n') {
2146 // the string cannot go further that the line
2148 if (currentCharacter == '\\') {
2150 scanDoubleQuotedEscapeCharacter();
2151 } catch (InvalidInputException ex) {
2155 // try { // consume next character
2156 unicodeAsBackSlash = false;
2157 currentCharacter = source[currentPosition++];
2158 // if (((currentCharacter = source[currentPosition++]) == '\\')
2159 // && (source[currentPosition] == 'u')) {
2160 // getNextUnicodeChar();
2162 if (withoutUnicodePtr != 0) {
2163 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2166 // } catch (InvalidInputException ex) {
2169 } catch (IndexOutOfBoundsException e) {
2175 if ((test = getNextChar('/', '*')) == 0) {
2179 currentCharacter = source[currentPosition++];
2180 // if (((currentCharacter = source[currentPosition++]) ==
2182 // && (source[currentPosition] == 'u')) {
2183 // //-------------unicode traitement ------------
2184 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2185 // currentPosition++;
2186 // while (source[currentPosition] == 'u') {
2187 // currentPosition++;
2190 // Character.getNumericValue(source[currentPosition++]))
2194 // Character.getNumericValue(source[currentPosition++]))
2198 // Character.getNumericValue(source[currentPosition++]))
2202 // Character.getNumericValue(source[currentPosition++]))
2205 // //error don't care of the value
2206 // currentCharacter = 'A';
2207 // } //something different from \n and \r
2209 // currentCharacter =
2210 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2213 while (currentCharacter != '\r' && currentCharacter != '\n') {
2215 currentCharacter = source[currentPosition++];
2216 // if (((currentCharacter = source[currentPosition++])
2218 // && (source[currentPosition] == 'u')) {
2219 // //-------------unicode traitement ------------
2220 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2221 // currentPosition++;
2222 // while (source[currentPosition] == 'u') {
2223 // currentPosition++;
2226 // Character.getNumericValue(source[currentPosition++]))
2230 // Character.getNumericValue(source[currentPosition++]))
2234 // Character.getNumericValue(source[currentPosition++]))
2238 // Character.getNumericValue(source[currentPosition++]))
2241 // //error don't care of the value
2242 // currentCharacter = 'A';
2243 // } //something different from \n and \r
2245 // currentCharacter =
2246 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2250 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2251 pushLineSeparator();
2252 } catch (IndexOutOfBoundsException e) {
2253 } //an eof will them be generated
2257 //traditional and annotation comment
2258 boolean star = false;
2259 // try { // consume next character
2260 unicodeAsBackSlash = false;
2261 currentCharacter = source[currentPosition++];
2262 // if (((currentCharacter = source[currentPosition++]) == '\\')
2263 // && (source[currentPosition] == 'u')) {
2264 // getNextUnicodeChar();
2266 if (withoutUnicodePtr != 0) {
2267 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2270 // } catch (InvalidInputException ex) {
2272 if (currentCharacter == '*') {
2275 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2276 pushLineSeparator();
2277 try { //get the next char
2278 currentCharacter = source[currentPosition++];
2279 // if (((currentCharacter = source[currentPosition++]) ==
2281 // && (source[currentPosition] == 'u')) {
2282 // //-------------unicode traitement ------------
2283 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2284 // currentPosition++;
2285 // while (source[currentPosition] == 'u') {
2286 // currentPosition++;
2289 // Character.getNumericValue(source[currentPosition++]))
2293 // Character.getNumericValue(source[currentPosition++]))
2297 // Character.getNumericValue(source[currentPosition++]))
2301 // Character.getNumericValue(source[currentPosition++]))
2304 // //error don't care of the value
2305 // currentCharacter = 'A';
2306 // } //something different from * and /
2308 // currentCharacter =
2309 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2312 //loop until end of comment */
2313 while ((currentCharacter != '/') || (!star)) {
2314 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2315 pushLineSeparator();
2316 star = currentCharacter == '*';
2318 currentCharacter = source[currentPosition++];
2319 // if (((currentCharacter = source[currentPosition++])
2321 // && (source[currentPosition] == 'u')) {
2322 // //-------------unicode traitement ------------
2323 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2324 // currentPosition++;
2325 // while (source[currentPosition] == 'u') {
2326 // currentPosition++;
2329 // Character.getNumericValue(source[currentPosition++]))
2333 // Character.getNumericValue(source[currentPosition++]))
2337 // Character.getNumericValue(source[currentPosition++]))
2341 // Character.getNumericValue(source[currentPosition++]))
2344 // //error don't care of the value
2345 // currentCharacter = 'A';
2346 // } //something different from * and /
2348 // currentCharacter =
2349 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2353 } catch (IndexOutOfBoundsException e) {
2361 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2363 scanIdentifierOrKeyword((currentCharacter == '$'));
2364 } catch (InvalidInputException ex) {
2369 if (Character.isDigit(currentCharacter)) {
2372 } catch (InvalidInputException ex) {
2379 //-----------------end switch while try--------------------
2380 } catch (IndexOutOfBoundsException e) {
2381 } catch (InvalidInputException e) {
2386 // public final boolean jumpOverUnicodeWhiteSpace()
2387 // throws InvalidInputException {
2389 // //handle the case of unicode. Jump over the next whiteSpace
2390 // //making startPosition pointing on the next available char
2391 // //On false, the currentCharacter is filled up with a potential
2395 // this.wasAcr = false;
2396 // int c1, c2, c3, c4;
2397 // int unicodeSize = 6;
2398 // currentPosition++;
2399 // while (source[currentPosition] == 'u') {
2400 // currentPosition++;
2404 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2406 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2408 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2410 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2412 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2415 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2416 // if (recordLineSeparator
2417 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2418 // pushLineSeparator();
2419 // if (Character.isWhitespace(currentCharacter))
2422 // //buffer the new char which is not a white space
2423 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2424 // //withoutUnicodePtr == 1 is true here
2426 // } catch (IndexOutOfBoundsException e) {
2427 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2430 public final int[] getLineEnds() {
2431 //return a bounded copy of this.lineEnds
2433 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2437 public char[] getSource() {
2441 public static boolean isIdentifierOrKeyword(int token) {
2442 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2445 final char[] optimizedCurrentTokenSource1() {
2446 //return always the same char[] build only once
2447 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2448 char charOne = source[startPosition];
2503 return new char[] { charOne };
2507 final char[] optimizedCurrentTokenSource2() {
2509 c0 = source[startPosition];
2510 c1 = source[startPosition + 1];
2512 //return always the same char[] build only once
2513 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2516 return charArray_va;
2518 return charArray_vb;
2520 return charArray_vc;
2522 return charArray_vd;
2524 return charArray_ve;
2526 return charArray_vf;
2528 return charArray_vg;
2530 return charArray_vh;
2532 return charArray_vi;
2534 return charArray_vj;
2536 return charArray_vk;
2538 return charArray_vl;
2540 return charArray_vm;
2542 return charArray_vn;
2544 return charArray_vo;
2546 return charArray_vp;
2548 return charArray_vq;
2550 return charArray_vr;
2552 return charArray_vs;
2554 return charArray_vt;
2556 return charArray_vu;
2558 return charArray_vv;
2560 return charArray_vw;
2562 return charArray_vx;
2564 return charArray_vy;
2566 return charArray_vz;
2569 //try to return the same char[] build only once
2570 int hash = ((c0 << 6) + c1) % TableSize;
2571 char[][] table = charArray_length[0][hash];
2573 while (++i < InternalTableSize) {
2574 char[] charArray = table[i];
2575 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2578 //---------other side---------
2580 int max = newEntry2;
2581 while (++i <= max) {
2582 char[] charArray = table[i];
2583 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2586 //--------add the entry-------
2587 if (++max >= InternalTableSize)
2590 table[max] = (r = new char[] { c0, c1 });
2595 final char[] optimizedCurrentTokenSource3() {
2596 //try to return the same char[] build only once
2598 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2600 char[][] table = charArray_length[1][hash];
2602 while (++i < InternalTableSize) {
2603 char[] charArray = table[i];
2604 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2607 //---------other side---------
2609 int max = newEntry3;
2610 while (++i <= max) {
2611 char[] charArray = table[i];
2612 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2615 //--------add the entry-------
2616 if (++max >= InternalTableSize)
2619 table[max] = (r = new char[] { c0, c1, c2 });
2624 final char[] optimizedCurrentTokenSource4() {
2625 //try to return the same char[] build only once
2626 char c0, c1, c2, c3;
2627 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2628 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2630 char[][] table = charArray_length[2][(int) hash];
2632 while (++i < InternalTableSize) {
2633 char[] charArray = table[i];
2634 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2637 //---------other side---------
2639 int max = newEntry4;
2640 while (++i <= max) {
2641 char[] charArray = table[i];
2642 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2645 //--------add the entry-------
2646 if (++max >= InternalTableSize)
2649 table[max] = (r = new char[] { c0, c1, c2, c3 });
2654 final char[] optimizedCurrentTokenSource5() {
2655 //try to return the same char[] build only once
2656 char c0, c1, c2, c3, c4;
2657 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2658 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2660 char[][] table = charArray_length[3][(int) hash];
2662 while (++i < InternalTableSize) {
2663 char[] charArray = table[i];
2664 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2667 //---------other side---------
2669 int max = newEntry5;
2670 while (++i <= max) {
2671 char[] charArray = table[i];
2672 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2675 //--------add the entry-------
2676 if (++max >= InternalTableSize)
2679 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2684 final char[] optimizedCurrentTokenSource6() {
2685 //try to return the same char[] build only once
2686 char c0, c1, c2, c3, c4, c5;
2687 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2688 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2689 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2691 char[][] table = charArray_length[4][(int) hash];
2693 while (++i < InternalTableSize) {
2694 char[] charArray = table[i];
2695 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2696 && (c5 == charArray[5]))
2699 //---------other side---------
2701 int max = newEntry6;
2702 while (++i <= max) {
2703 char[] charArray = table[i];
2704 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2705 && (c5 == charArray[5]))
2708 //--------add the entry-------
2709 if (++max >= InternalTableSize)
2712 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2717 public final void pushLineSeparator() throws InvalidInputException {
2718 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2719 final int INCREMENT = 250;
2720 if (this.checkNonExternalizedStringLiterals) {
2721 // reinitialize the current line for non externalize strings purpose
2724 //currentCharacter is at position currentPosition-1
2726 if (currentCharacter == '\r') {
2727 int separatorPos = currentPosition - 1;
2728 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2730 //System.out.println("CR-" + separatorPos);
2732 lineEnds[++linePtr] = separatorPos;
2733 } catch (IndexOutOfBoundsException e) {
2734 //linePtr value is correct
2735 int oldLength = lineEnds.length;
2736 int[] old = lineEnds;
2737 lineEnds = new int[oldLength + INCREMENT];
2738 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2739 lineEnds[linePtr] = separatorPos;
2741 // look-ahead for merged cr+lf
2743 if (source[currentPosition] == '\n') {
2744 //System.out.println("look-ahead LF-" + currentPosition);
2745 lineEnds[linePtr] = currentPosition;
2751 } catch (IndexOutOfBoundsException e) {
2756 if (currentCharacter == '\n') {
2757 //must merge eventual cr followed by lf
2758 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2759 //System.out.println("merge LF-" + (currentPosition - 1));
2760 lineEnds[linePtr] = currentPosition - 1;
2762 int separatorPos = currentPosition - 1;
2763 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2765 // System.out.println("LF-" + separatorPos);
2767 lineEnds[++linePtr] = separatorPos;
2768 } catch (IndexOutOfBoundsException e) {
2769 //linePtr value is correct
2770 int oldLength = lineEnds.length;
2771 int[] old = lineEnds;
2772 lineEnds = new int[oldLength + INCREMENT];
2773 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2774 lineEnds[linePtr] = separatorPos;
2782 public final void pushUnicodeLineSeparator() {
2783 // isUnicode means that the \r or \n has been read as a unicode character
2784 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2785 final int INCREMENT = 250;
2786 //currentCharacter is at position currentPosition-1
2787 if (this.checkNonExternalizedStringLiterals) {
2788 // reinitialize the current line for non externalize strings purpose
2792 if (currentCharacter == '\r') {
2793 int separatorPos = currentPosition - 6;
2794 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2796 //System.out.println("CR-" + separatorPos);
2798 lineEnds[++linePtr] = separatorPos;
2799 } catch (IndexOutOfBoundsException e) {
2800 //linePtr value is correct
2801 int oldLength = lineEnds.length;
2802 int[] old = lineEnds;
2803 lineEnds = new int[oldLength + INCREMENT];
2804 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2805 lineEnds[linePtr] = separatorPos;
2807 // look-ahead for merged cr+lf
2808 if (source[currentPosition] == '\n') {
2809 //System.out.println("look-ahead LF-" + currentPosition);
2810 lineEnds[linePtr] = currentPosition;
2818 if (currentCharacter == '\n') {
2819 //must merge eventual cr followed by lf
2820 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2821 //System.out.println("merge LF-" + (currentPosition - 1));
2822 lineEnds[linePtr] = currentPosition - 6;
2824 int separatorPos = currentPosition - 6;
2825 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2827 // System.out.println("LF-" + separatorPos);
2829 lineEnds[++linePtr] = separatorPos;
2830 } catch (IndexOutOfBoundsException e) {
2831 //linePtr value is correct
2832 int oldLength = lineEnds.length;
2833 int[] old = lineEnds;
2834 lineEnds = new int[oldLength + INCREMENT];
2835 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2836 lineEnds[linePtr] = separatorPos;
2844 public void recordComment(int token) {
2846 int stopPosition = this.currentPosition;
2848 case TokenNameCOMMENT_LINE:
2849 stopPosition = -this.lastCommentLinePosition;
2851 case TokenNameCOMMENT_BLOCK:
2852 stopPosition = -this.currentPosition;
2856 // a new comment is recorded
2857 int length = this.commentStops.length;
2858 if (++this.commentPtr >= length) {
2859 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2860 //grows the positions buffers too
2861 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2863 this.commentStops[this.commentPtr] = stopPosition;
2864 this.commentStarts[this.commentPtr] = this.startPosition;
2867 // public final void recordComment(boolean isJavadoc) {
2868 // // a new annotation comment is recorded
2870 // commentStops[++commentPtr] = isJavadoc
2871 // ? currentPosition
2872 // : -currentPosition;
2873 // } catch (IndexOutOfBoundsException e) {
2874 // int oldStackLength = commentStops.length;
2875 // int[] oldStack = commentStops;
2876 // commentStops = new int[oldStackLength + 30];
2877 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2878 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2879 // //grows the positions buffers too
2880 // int[] old = commentStarts;
2881 // commentStarts = new int[oldStackLength + 30];
2882 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2884 // //the buffer is of a correct size here
2885 // commentStarts[commentPtr] = startPosition;
2887 public void resetTo(int begin, int end) {
2888 //reset the scanner to a given position where it may rescan again
2890 initialPosition = startPosition = currentPosition = begin;
2891 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2892 commentPtr = -1; // reset comment stack
2895 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2896 // the string with "\\u" is a legal string of two chars \ and u
2897 //thus we use a direct access to the source (for regular cases).
2898 // if (unicodeAsBackSlash) {
2899 // // consume next character
2900 // unicodeAsBackSlash = false;
2901 // if (((currentCharacter = source[currentPosition++]) == '\\')
2902 // && (source[currentPosition] == 'u')) {
2903 // getNextUnicodeChar();
2905 // if (withoutUnicodePtr != 0) {
2906 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2910 currentCharacter = source[currentPosition++];
2911 switch (currentCharacter) {
2913 currentCharacter = '\'';
2916 currentCharacter = '\\';
2919 currentCharacter = '\\';
2924 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2925 // the string with "\\u" is a legal string of two chars \ and u
2926 //thus we use a direct access to the source (for regular cases).
2927 // if (unicodeAsBackSlash) {
2928 // // consume next character
2929 // unicodeAsBackSlash = false;
2930 // if (((currentCharacter = source[currentPosition++]) == '\\')
2931 // && (source[currentPosition] == 'u')) {
2932 // getNextUnicodeChar();
2934 // if (withoutUnicodePtr != 0) {
2935 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2939 currentCharacter = source[currentPosition++];
2940 switch (currentCharacter) {
2942 // currentCharacter = '\b';
2945 currentCharacter = '\t';
2948 currentCharacter = '\n';
2951 // currentCharacter = '\f';
2954 currentCharacter = '\r';
2957 currentCharacter = '\"';
2960 currentCharacter = '\'';
2963 currentCharacter = '\\';
2966 currentCharacter = '$';
2969 // -----------octal escape--------------
2971 // OctalDigit OctalDigit
2972 // ZeroToThree OctalDigit OctalDigit
2973 int number = Character.getNumericValue(currentCharacter);
2974 if (number >= 0 && number <= 7) {
2975 boolean zeroToThreeNot = number > 3;
2976 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2977 int digit = Character.getNumericValue(currentCharacter);
2978 if (digit >= 0 && digit <= 7) {
2979 number = (number * 8) + digit;
2980 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2981 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2982 // Digit --> ignore last character
2985 digit = Character.getNumericValue(currentCharacter);
2986 if (digit >= 0 && digit <= 7) {
2987 // has read \ZeroToThree OctalDigit OctalDigit
2988 number = (number * 8) + digit;
2989 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2990 // --> ignore last character
2994 } else { // has read \OctalDigit NonDigit--> ignore last
2998 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3002 } else { // has read \OctalDigit --> ignore last character
3006 throw new InvalidInputException(INVALID_ESCAPE);
3007 currentCharacter = (char) number;
3010 // throw new InvalidInputException(INVALID_ESCAPE);
3014 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3015 // return scanIdentifierOrKeyword( false );
3017 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3019 //first dispatch on the first char.
3020 //then the length. If there are several
3021 //keywors with the same length AND the same first char, then do another
3022 //disptach on the second char :-)...cool....but fast !
3023 useAssertAsAnIndentifier = false;
3024 while (getNextCharAsJavaIdentifierPart()) {
3028 // if (new String(getCurrentTokenSource()).equals("$this")) {
3029 // return TokenNamethis;
3031 return TokenNameVariable;
3036 // if (withoutUnicodePtr == 0)
3037 //quick test on length == 1 but not on length > 12 while most identifier
3038 //have a length which is <= 12...but there are lots of identifier with
3041 if ((length = currentPosition - startPosition) == 1)
3042 return TokenNameIdentifier;
3044 data = new char[length];
3045 index = startPosition;
3046 for (int i = 0; i < length; i++) {
3047 data[i] = Character.toLowerCase(source[index + i]);
3051 // if ((length = withoutUnicodePtr) == 1)
3052 // return TokenNameIdentifier;
3053 // // data = withoutUnicodeBuffer;
3054 // data = new char[withoutUnicodeBuffer.length];
3055 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3056 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3060 firstLetter = data[index];
3061 switch (firstLetter) {
3066 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3067 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3068 return TokenNameFILE;
3069 index = 0; //__LINE__
3070 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3071 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3072 return TokenNameLINE;
3076 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3077 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3078 return TokenNameCLASS_C;
3082 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3083 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3084 && (data[++index] == '_'))
3085 return TokenNameMETHOD_C;
3089 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3090 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3091 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3092 return TokenNameFUNC_C;
3095 return TokenNameIdentifier;
3097 // as and array abstract
3101 if ((data[++index] == 's')) {
3104 return TokenNameIdentifier;
3108 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3109 return TokenNameand;
3111 return TokenNameIdentifier;
3115 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3116 return TokenNamearray;
3118 return TokenNameIdentifier;
3120 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3121 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3122 return TokenNameabstract;
3124 return TokenNameIdentifier;
3126 return TokenNameIdentifier;
3132 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3133 return TokenNamebreak;
3135 return TokenNameIdentifier;
3137 return TokenNameIdentifier;
3140 //case catch class clone const continue
3143 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3144 return TokenNamecase;
3146 return TokenNameIdentifier;
3148 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3149 return TokenNamecatch;
3151 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3152 return TokenNameclass;
3154 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3155 return TokenNameclone;
3157 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3158 return TokenNameconst;
3160 return TokenNameIdentifier;
3162 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3163 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3164 return TokenNamecontinue;
3166 return TokenNameIdentifier;
3168 return TokenNameIdentifier;
3171 // declare default do die
3172 // TODO delete define ==> no keyword !
3175 if ((data[++index] == 'o'))
3178 return TokenNameIdentifier;
3180 // if ((data[++index] == 'e')
3181 // && (data[++index] == 'f')
3182 // && (data[++index] == 'i')
3183 // && (data[++index] == 'n')
3184 // && (data[++index] == 'e'))
3185 // return TokenNamedefine;
3187 // return TokenNameIdentifier;
3189 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3190 && (data[++index] == 'r') && (data[++index] == 'e'))
3191 return TokenNamedeclare;
3193 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3194 && (data[++index] == 'l') && (data[++index] == 't'))
3195 return TokenNamedefault;
3197 return TokenNameIdentifier;
3199 return TokenNameIdentifier;
3202 //echo else exit elseif extends eval
3205 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3206 return TokenNameecho;
3207 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3208 return TokenNameelse;
3209 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3210 return TokenNameexit;
3211 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3212 return TokenNameeval;
3214 return TokenNameIdentifier;
3217 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3218 return TokenNameendif;
3219 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3220 return TokenNameempty;
3222 return TokenNameIdentifier;
3225 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3226 && (data[++index] == 'r'))
3227 return TokenNameendfor;
3228 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3229 && (data[++index] == 'f'))
3230 return TokenNameelseif;
3232 return TokenNameIdentifier;
3234 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3235 && (data[++index] == 'd') && (data[++index] == 's'))
3236 return TokenNameextends;
3238 return TokenNameIdentifier;
3241 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3242 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3243 return TokenNameendwhile;
3245 return TokenNameIdentifier;
3248 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3249 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3250 return TokenNameendswitch;
3252 return TokenNameIdentifier;
3255 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3256 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3257 && (data[++index] == 'e'))
3258 return TokenNameenddeclare;
3260 if ((data[++index] == 'n') // endforeach
3261 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3262 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3263 return TokenNameendforeach;
3265 return TokenNameIdentifier;
3267 return TokenNameIdentifier;
3270 //for false final function
3273 if ((data[++index] == 'o') && (data[++index] == 'r'))
3274 return TokenNamefor;
3276 return TokenNameIdentifier;
3278 // if ((data[++index] == 'a') && (data[++index] == 'l')
3279 // && (data[++index] == 's') && (data[++index] == 'e'))
3280 // return TokenNamefalse;
3281 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3282 return TokenNamefinal;
3284 return TokenNameIdentifier;
3287 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3288 && (data[++index] == 'c') && (data[++index] == 'h'))
3289 return TokenNameforeach;
3291 return TokenNameIdentifier;
3294 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3295 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3296 return TokenNamefunction;
3298 return TokenNameIdentifier;
3300 return TokenNameIdentifier;
3305 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3306 && (data[++index] == 'l')) {
3307 return TokenNameglobal;
3310 return TokenNameIdentifier;
3312 //if int isset include include_once instanceof interface implements
3315 if (data[++index] == 'f')
3318 return TokenNameIdentifier;
3320 // if ((data[++index] == 'n') && (data[++index] == 't'))
3321 // return TokenNameint;
3323 // return TokenNameIdentifier;
3325 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3326 return TokenNameisset;
3328 return TokenNameIdentifier;
3330 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3331 && (data[++index] == 'd') && (data[++index] == 'e'))
3332 return TokenNameinclude;
3334 return TokenNameIdentifier;
3337 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3338 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3339 return TokenNameinterface;
3341 return TokenNameIdentifier;
3344 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3345 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3346 && (data[++index] == 'f'))
3347 return TokenNameinstanceof;
3348 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3349 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3350 && (data[++index] == 's'))
3351 return TokenNameimplements;
3353 return TokenNameIdentifier;
3355 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3356 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3357 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3358 return TokenNameinclude_once;
3360 return TokenNameIdentifier;
3362 return TokenNameIdentifier;
3367 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3368 return TokenNamelist;
3371 return TokenNameIdentifier;
3376 if ((data[++index] == 'e') && (data[++index] == 'w'))
3377 return TokenNamenew;
3379 return TokenNameIdentifier;
3381 // if ((data[++index] == 'u') && (data[++index] == 'l')
3382 // && (data[++index] == 'l'))
3383 // return TokenNamenull;
3385 // return TokenNameIdentifier;
3387 return TokenNameIdentifier;
3392 if (data[++index] == 'r') {
3396 // if (length == 12) {
3397 // if ((data[++index] == 'l')
3398 // && (data[++index] == 'd')
3399 // && (data[++index] == '_')
3400 // && (data[++index] == 'f')
3401 // && (data[++index] == 'u')
3402 // && (data[++index] == 'n')
3403 // && (data[++index] == 'c')
3404 // && (data[++index] == 't')
3405 // && (data[++index] == 'i')
3406 // && (data[++index] == 'o')
3407 // && (data[++index] == 'n')) {
3408 // return TokenNameold_function;
3411 return TokenNameIdentifier;
3413 // print public private protected
3416 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3417 return TokenNameprint;
3419 return TokenNameIdentifier;
3421 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3422 && (data[++index] == 'c')) {
3423 return TokenNamepublic;
3425 return TokenNameIdentifier;
3427 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3428 && (data[++index] == 't') && (data[++index] == 'e')) {
3429 return TokenNameprivate;
3431 return TokenNameIdentifier;
3433 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3434 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3435 return TokenNameprotected;
3437 return TokenNameIdentifier;
3439 return TokenNameIdentifier;
3441 //return require require_once
3443 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3444 && (data[++index] == 'n')) {
3445 return TokenNamereturn;
3447 } else if (length == 7) {
3448 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3449 && (data[++index] == 'r') && (data[++index] == 'e')) {
3450 return TokenNamerequire;
3452 } else if (length == 12) {
3453 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3454 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3455 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3456 return TokenNamerequire_once;
3459 return TokenNameIdentifier;
3464 if (data[++index] == 't')
3465 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3466 return TokenNamestatic;
3468 return TokenNameIdentifier;
3469 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3470 && (data[++index] == 'h'))
3471 return TokenNameswitch;
3473 return TokenNameIdentifier;
3475 return TokenNameIdentifier;
3481 if ((data[++index] == 'r') && (data[++index] == 'y'))
3482 return TokenNametry;
3484 return TokenNameIdentifier;
3486 // if ((data[++index] == 'r') && (data[++index] == 'u')
3487 // && (data[++index] == 'e'))
3488 // return TokenNametrue;
3490 // return TokenNameIdentifier;
3492 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3493 return TokenNamethrow;
3495 return TokenNameIdentifier;
3497 return TokenNameIdentifier;
3503 if ((data[++index] == 's') && (data[++index] == 'e'))
3504 return TokenNameuse;
3506 return TokenNameIdentifier;
3508 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3509 return TokenNameunset;
3511 return TokenNameIdentifier;
3513 return TokenNameIdentifier;
3519 if ((data[++index] == 'a') && (data[++index] == 'r'))
3520 return TokenNamevar;
3522 return TokenNameIdentifier;
3524 return TokenNameIdentifier;
3530 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3531 return TokenNamewhile;
3533 return TokenNameIdentifier;
3534 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3535 // (data[++index]=='e') && (data[++index]=='f')&&
3536 // (data[++index]=='p'))
3537 //return TokenNamewidefp ;
3539 //return TokenNameIdentifier;
3541 return TokenNameIdentifier;
3547 if ((data[++index] == 'o') && (data[++index] == 'r'))
3548 return TokenNamexor;
3550 return TokenNameIdentifier;
3552 return TokenNameIdentifier;
3555 return TokenNameIdentifier;
3559 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3560 //when entering this method the currentCharacter is the firt
3561 //digit of the number , i.e. it may be preceeded by a . when
3563 boolean floating = dotPrefix;
3564 if ((!dotPrefix) && (currentCharacter == '0')) {
3565 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3566 //force the first char of the hexa number do exist...
3567 // consume next character
3568 unicodeAsBackSlash = false;
3569 currentCharacter = source[currentPosition++];
3570 // if (((currentCharacter = source[currentPosition++]) == '\\')
3571 // && (source[currentPosition] == 'u')) {
3572 // getNextUnicodeChar();
3574 // if (withoutUnicodePtr != 0) {
3575 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3578 if (Character.digit(currentCharacter, 16) == -1)
3579 throw new InvalidInputException(INVALID_HEXA);
3581 while (getNextCharAsDigit(16)) {
3584 // if (getNextChar('l', 'L') >= 0)
3585 // return TokenNameLongLiteral;
3587 return TokenNameIntegerLiteral;
3589 //there is x or X in the number
3590 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3591 // 00078.0 is true !!!!! crazy language
3592 if (getNextCharAsDigit()) {
3593 //-------------potential octal-----------------
3594 while (getNextCharAsDigit()) {
3597 // if (getNextChar('l', 'L') >= 0) {
3598 // return TokenNameLongLiteral;
3601 // if (getNextChar('f', 'F') >= 0) {
3602 // return TokenNameFloatingPointLiteral;
3604 if (getNextChar('d', 'D') >= 0) {
3605 return TokenNameDoubleLiteral;
3606 } else { //make the distinction between octal and float ....
3607 if (getNextChar('.')) { //bingo ! ....
3608 while (getNextCharAsDigit()) {
3611 if (getNextChar('e', 'E') >= 0) {
3612 // consume next character
3613 unicodeAsBackSlash = false;
3614 currentCharacter = source[currentPosition++];
3615 // if (((currentCharacter = source[currentPosition++]) == '\\')
3616 // && (source[currentPosition] == 'u')) {
3617 // getNextUnicodeChar();
3619 // if (withoutUnicodePtr != 0) {
3620 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3623 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3624 // consume next character
3625 unicodeAsBackSlash = false;
3626 currentCharacter = source[currentPosition++];
3627 // if (((currentCharacter = source[currentPosition++]) == '\\')
3628 // && (source[currentPosition] == 'u')) {
3629 // getNextUnicodeChar();
3631 // if (withoutUnicodePtr != 0) {
3632 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3633 // currentCharacter;
3637 if (!Character.isDigit(currentCharacter))
3638 throw new InvalidInputException(INVALID_FLOAT);
3639 while (getNextCharAsDigit()) {
3643 // if (getNextChar('f', 'F') >= 0)
3644 // return TokenNameFloatingPointLiteral;
3645 getNextChar('d', 'D'); //jump over potential d or D
3646 return TokenNameDoubleLiteral;
3648 return TokenNameIntegerLiteral;
3655 while (getNextCharAsDigit()) {
3658 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3659 // return TokenNameLongLiteral;
3660 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3661 while (getNextCharAsDigit()) {
3666 //if floating is true both exponant and suffix may be optional
3667 if (getNextChar('e', 'E') >= 0) {
3669 // consume next character
3670 unicodeAsBackSlash = false;
3671 currentCharacter = source[currentPosition++];
3672 // if (((currentCharacter = source[currentPosition++]) == '\\')
3673 // && (source[currentPosition] == 'u')) {
3674 // getNextUnicodeChar();
3676 // if (withoutUnicodePtr != 0) {
3677 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3680 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3683 unicodeAsBackSlash = false;
3684 currentCharacter = source[currentPosition++];
3685 // if (((currentCharacter = source[currentPosition++]) == '\\')
3686 // && (source[currentPosition] == 'u')) {
3687 // getNextUnicodeChar();
3689 // if (withoutUnicodePtr != 0) {
3690 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3694 if (!Character.isDigit(currentCharacter))
3695 throw new InvalidInputException(INVALID_FLOAT);
3696 while (getNextCharAsDigit()) {
3700 if (getNextChar('d', 'D') >= 0)
3701 return TokenNameDoubleLiteral;
3702 // if (getNextChar('f', 'F') >= 0)
3703 // return TokenNameFloatingPointLiteral;
3704 //the long flag has been tested before
3705 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3709 * Search the line number corresponding to a specific position
3712 public final int getLineNumber(int position) {
3713 if (lineEnds == null)
3715 int length = linePtr + 1;
3718 int g = 0, d = length - 1;
3722 if (position < lineEnds[m]) {
3724 } else if (position > lineEnds[m]) {
3730 if (position < lineEnds[m]) {
3736 public void setPHPMode(boolean mode) {
3740 public final void setSource(char[] source) {
3741 setSource(null, source);
3744 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3745 //the source-buffer is set to sourceString
3746 this.compilationUnit = compilationUnit;
3747 if (source == null) {
3748 this.source = new char[0];
3750 this.source = source;
3753 initialPosition = currentPosition = 0;
3754 containsAssertKeyword = false;
3755 withoutUnicodeBuffer = new char[this.source.length];
3756 encapsedStringStack = new Stack();
3759 public String toString() {
3760 if (startPosition == source.length)
3761 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3762 if (currentPosition > source.length)
3763 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3764 char front[] = new char[startPosition];
3765 System.arraycopy(source, 0, front, 0, startPosition);
3766 int middleLength = (currentPosition - 1) - startPosition + 1;
3768 if (middleLength > -1) {
3769 middle = new char[middleLength];
3770 System.arraycopy(source, startPosition, middle, 0, middleLength);
3772 middle = new char[0];
3774 char end[] = new char[source.length - (currentPosition - 1)];
3775 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3776 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3777 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3781 public final String toStringAction(int act) {
3783 case TokenNameERROR:
3784 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3786 case TokenNameINLINE_HTML:
3787 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3788 case TokenNameIdentifier:
3789 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3790 case TokenNameVariable:
3791 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3792 case TokenNameabstract:
3793 return "abstract"; //$NON-NLS-1$
3795 return "AND"; //$NON-NLS-1$
3796 case TokenNamearray:
3797 return "array"; //$NON-NLS-1$
3799 return "as"; //$NON-NLS-1$
3800 case TokenNamebreak:
3801 return "break"; //$NON-NLS-1$
3803 return "case"; //$NON-NLS-1$
3804 case TokenNameclass:
3805 return "class"; //$NON-NLS-1$
3806 case TokenNamecatch:
3807 return "catch"; //$NON-NLS-1$
3808 case TokenNameclone:
3811 case TokenNameconst:
3814 case TokenNamecontinue:
3815 return "continue"; //$NON-NLS-1$
3816 case TokenNamedefault:
3817 return "default"; //$NON-NLS-1$
3818 // case TokenNamedefine :
3819 // return "define"; //$NON-NLS-1$
3821 return "do"; //$NON-NLS-1$
3823 return "echo"; //$NON-NLS-1$
3825 return "else"; //$NON-NLS-1$
3826 case TokenNameelseif:
3827 return "elseif"; //$NON-NLS-1$
3828 case TokenNameendfor:
3829 return "endfor"; //$NON-NLS-1$
3830 case TokenNameendforeach:
3831 return "endforeach"; //$NON-NLS-1$
3832 case TokenNameendif:
3833 return "endif"; //$NON-NLS-1$
3834 case TokenNameendswitch:
3835 return "endswitch"; //$NON-NLS-1$
3836 case TokenNameendwhile:
3837 return "endwhile"; //$NON-NLS-1$
3840 case TokenNameextends:
3841 return "extends"; //$NON-NLS-1$
3842 // case TokenNamefalse :
3843 // return "false"; //$NON-NLS-1$
3844 case TokenNamefinal:
3845 return "final"; //$NON-NLS-1$
3847 return "for"; //$NON-NLS-1$
3848 case TokenNameforeach:
3849 return "foreach"; //$NON-NLS-1$
3850 case TokenNamefunction:
3851 return "function"; //$NON-NLS-1$
3852 case TokenNameglobal:
3853 return "global"; //$NON-NLS-1$
3855 return "if"; //$NON-NLS-1$
3856 case TokenNameimplements:
3857 return "implements"; //$NON-NLS-1$
3858 case TokenNameinclude:
3859 return "include"; //$NON-NLS-1$
3860 case TokenNameinclude_once:
3861 return "include_once"; //$NON-NLS-1$
3862 case TokenNameinstanceof:
3863 return "instanceof"; //$NON-NLS-1$
3864 case TokenNameinterface:
3865 return "interface"; //$NON-NLS-1$
3866 case TokenNameisset:
3867 return "isset"; //$NON-NLS-1$
3869 return "list"; //$NON-NLS-1$
3871 return "new"; //$NON-NLS-1$
3872 // case TokenNamenull :
3873 // return "null"; //$NON-NLS-1$
3875 return "OR"; //$NON-NLS-1$
3876 case TokenNameprint:
3877 return "print"; //$NON-NLS-1$
3878 case TokenNameprivate:
3879 return "private"; //$NON-NLS-1$
3880 case TokenNameprotected:
3881 return "protected"; //$NON-NLS-1$
3882 case TokenNamepublic:
3883 return "public"; //$NON-NLS-1$
3884 case TokenNamerequire:
3885 return "require"; //$NON-NLS-1$
3886 case TokenNamerequire_once:
3887 return "require_once"; //$NON-NLS-1$
3888 case TokenNamereturn:
3889 return "return"; //$NON-NLS-1$
3890 case TokenNamestatic:
3891 return "static"; //$NON-NLS-1$
3892 case TokenNameswitch:
3893 return "switch"; //$NON-NLS-1$
3894 // case TokenNametrue :
3895 // return "true"; //$NON-NLS-1$
3896 case TokenNameunset:
3897 return "unset"; //$NON-NLS-1$
3899 return "var"; //$NON-NLS-1$
3900 case TokenNamewhile:
3901 return "while"; //$NON-NLS-1$
3903 return "XOR"; //$NON-NLS-1$
3904 // case TokenNamethis :
3905 // return "$this"; //$NON-NLS-1$
3906 case TokenNameIntegerLiteral:
3907 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3908 case TokenNameDoubleLiteral:
3909 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3910 case TokenNameStringDoubleQuote:
3911 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3912 case TokenNameStringSingleQuote:
3913 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3914 case TokenNameStringInterpolated:
3915 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3916 case TokenNameEncapsedString0:
3917 return "`"; //$NON-NLS-1$
3918 case TokenNameEncapsedString1:
3919 return "\'"; //$NON-NLS-1$
3920 case TokenNameEncapsedString2:
3921 return "\""; //$NON-NLS-1$
3922 case TokenNameSTRING:
3923 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3924 case TokenNameHEREDOC:
3925 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3926 case TokenNamePLUS_PLUS:
3927 return "++"; //$NON-NLS-1$
3928 case TokenNameMINUS_MINUS:
3929 return "--"; //$NON-NLS-1$
3930 case TokenNameEQUAL_EQUAL:
3931 return "=="; //$NON-NLS-1$
3932 case TokenNameEQUAL_EQUAL_EQUAL:
3933 return "==="; //$NON-NLS-1$
3934 case TokenNameEQUAL_GREATER:
3935 return "=>"; //$NON-NLS-1$
3936 case TokenNameLESS_EQUAL:
3937 return "<="; //$NON-NLS-1$
3938 case TokenNameGREATER_EQUAL:
3939 return ">="; //$NON-NLS-1$
3940 case TokenNameNOT_EQUAL:
3941 return "!="; //$NON-NLS-1$
3942 case TokenNameNOT_EQUAL_EQUAL:
3943 return "!=="; //$NON-NLS-1$
3944 case TokenNameLEFT_SHIFT:
3945 return "<<"; //$NON-NLS-1$
3946 case TokenNameRIGHT_SHIFT:
3947 return ">>"; //$NON-NLS-1$
3948 case TokenNamePLUS_EQUAL:
3949 return "+="; //$NON-NLS-1$
3950 case TokenNameMINUS_EQUAL:
3951 return "-="; //$NON-NLS-1$
3952 case TokenNameMULTIPLY_EQUAL:
3953 return "*="; //$NON-NLS-1$
3954 case TokenNameDIVIDE_EQUAL:
3955 return "/="; //$NON-NLS-1$
3956 case TokenNameAND_EQUAL:
3957 return "&="; //$NON-NLS-1$
3958 case TokenNameOR_EQUAL:
3959 return "|="; //$NON-NLS-1$
3960 case TokenNameXOR_EQUAL:
3961 return "^="; //$NON-NLS-1$
3962 case TokenNameREMAINDER_EQUAL:
3963 return "%="; //$NON-NLS-1$
3964 case TokenNameDOT_EQUAL:
3965 return ".="; //$NON-NLS-1$
3966 case TokenNameLEFT_SHIFT_EQUAL:
3967 return "<<="; //$NON-NLS-1$
3968 case TokenNameRIGHT_SHIFT_EQUAL:
3969 return ">>="; //$NON-NLS-1$
3970 case TokenNameOR_OR:
3971 return "||"; //$NON-NLS-1$
3972 case TokenNameAND_AND:
3973 return "&&"; //$NON-NLS-1$
3975 return "+"; //$NON-NLS-1$
3976 case TokenNameMINUS:
3977 return "-"; //$NON-NLS-1$
3978 case TokenNameMINUS_GREATER:
3981 return "!"; //$NON-NLS-1$
3982 case TokenNameREMAINDER:
3983 return "%"; //$NON-NLS-1$
3985 return "^"; //$NON-NLS-1$
3987 return "&"; //$NON-NLS-1$
3988 case TokenNameMULTIPLY:
3989 return "*"; //$NON-NLS-1$
3991 return "|"; //$NON-NLS-1$
3992 case TokenNameTWIDDLE:
3993 return "~"; //$NON-NLS-1$
3994 case TokenNameTWIDDLE_EQUAL:
3995 return "~="; //$NON-NLS-1$
3996 case TokenNameDIVIDE:
3997 return "/"; //$NON-NLS-1$
3998 case TokenNameGREATER:
3999 return ">"; //$NON-NLS-1$
4001 return "<"; //$NON-NLS-1$
4002 case TokenNameLPAREN:
4003 return "("; //$NON-NLS-1$
4004 case TokenNameRPAREN:
4005 return ")"; //$NON-NLS-1$
4006 case TokenNameLBRACE:
4007 return "{"; //$NON-NLS-1$
4008 case TokenNameRBRACE:
4009 return "}"; //$NON-NLS-1$
4010 case TokenNameLBRACKET:
4011 return "["; //$NON-NLS-1$
4012 case TokenNameRBRACKET:
4013 return "]"; //$NON-NLS-1$
4014 case TokenNameSEMICOLON:
4015 return ";"; //$NON-NLS-1$
4016 case TokenNameQUESTION:
4017 return "?"; //$NON-NLS-1$
4018 case TokenNameCOLON:
4019 return ":"; //$NON-NLS-1$
4020 case TokenNameCOMMA:
4021 return ","; //$NON-NLS-1$
4023 return "."; //$NON-NLS-1$
4024 case TokenNameEQUAL:
4025 return "="; //$NON-NLS-1$
4028 case TokenNameDOLLAR:
4030 case TokenNameDOLLAR_LBRACE:
4032 case TokenNameLBRACE_DOLLAR:
4035 return "EOF"; //$NON-NLS-1$
4036 case TokenNameWHITESPACE:
4037 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4038 case TokenNameCOMMENT_LINE:
4039 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4040 case TokenNameCOMMENT_BLOCK:
4041 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4042 case TokenNameCOMMENT_PHPDOC:
4043 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4044 // case TokenNameHTML :
4045 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4048 return "__FILE__"; //$NON-NLS-1$
4050 return "__LINE__"; //$NON-NLS-1$
4051 case TokenNameCLASS_C:
4052 return "__CLASS__"; //$NON-NLS-1$
4053 case TokenNameMETHOD_C:
4054 return "__METHOD__"; //$NON-NLS-1$
4055 case TokenNameFUNC_C:
4056 return "__FUNCTION__"; //$NON-NLS-1
4057 case TokenNameboolCAST:
4058 return "( bool )"; //$NON-NLS-1$
4059 case TokenNameintCAST:
4060 return "( int )"; //$NON-NLS-1$
4061 case TokenNamedoubleCAST:
4062 return "( double )"; //$NON-NLS-1$
4063 case TokenNameobjectCAST:
4064 return "( object )"; //$NON-NLS-1$
4065 case TokenNamestringCAST:
4066 return "( string )"; //$NON-NLS-1$
4068 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4076 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4077 this(tokenizeComments, tokenizeWhiteSpace, false);
4080 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4081 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4084 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4085 boolean assertMode) {
4086 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4089 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4090 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4091 this.eofPosition = Integer.MAX_VALUE;
4092 this.tokenizeComments = tokenizeComments;
4093 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4094 this.tokenizeStrings = tokenizeStrings;
4095 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4096 this.assertMode = assertMode;
4097 this.encapsedStringStack = null;
4098 this.taskTags = taskTags;
4099 this.taskPriorities = taskPriorities;
4102 private void checkNonExternalizeString() throws InvalidInputException {
4103 if (currentLine == null)
4105 parseTags(currentLine);
4108 private void parseTags(NLSLine line) throws InvalidInputException {
4109 String s = new String(getCurrentTokenSource());
4110 int pos = s.indexOf(TAG_PREFIX);
4111 int lineLength = line.size();
4113 int start = pos + TAG_PREFIX_LENGTH;
4114 int end = s.indexOf(TAG_POSTFIX, start);
4115 String index = s.substring(start, end);
4118 i = Integer.parseInt(index) - 1;
4119 // Tags are one based not zero based.
4120 } catch (NumberFormatException e) {
4121 i = -1; // we don't want to consider this as a valid NLS tag
4123 if (line.exists(i)) {
4126 pos = s.indexOf(TAG_PREFIX, start);
4128 this.nonNLSStrings = new StringLiteral[lineLength];
4129 int nonNLSCounter = 0;
4130 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4131 StringLiteral literal = (StringLiteral) iterator.next();
4132 if (literal != null) {
4133 this.nonNLSStrings[nonNLSCounter++] = literal;
4136 if (nonNLSCounter == 0) {
4137 this.nonNLSStrings = null;
4141 this.wasNonExternalizedStringLiteral = true;
4142 if (nonNLSCounter != lineLength) {
4143 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4148 public final void scanEscapeCharacter() throws InvalidInputException {
4149 // the string with "\\u" is a legal string of two chars \ and u
4150 //thus we use a direct access to the source (for regular cases).
4151 if (unicodeAsBackSlash) {
4152 // consume next character
4153 unicodeAsBackSlash = false;
4154 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4155 // (source[currentPosition] == 'u')) {
4156 // getNextUnicodeChar();
4158 if (withoutUnicodePtr != 0) {
4159 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4163 currentCharacter = source[currentPosition++];
4164 switch (currentCharacter) {
4166 currentCharacter = '\b';
4169 currentCharacter = '\t';
4172 currentCharacter = '\n';
4175 currentCharacter = '\f';
4178 currentCharacter = '\r';
4181 currentCharacter = '\"';
4184 currentCharacter = '\'';
4187 currentCharacter = '\\';
4190 // -----------octal escape--------------
4192 // OctalDigit OctalDigit
4193 // ZeroToThree OctalDigit OctalDigit
4194 int number = Character.getNumericValue(currentCharacter);
4195 if (number >= 0 && number <= 7) {
4196 boolean zeroToThreeNot = number > 3;
4197 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4198 int digit = Character.getNumericValue(currentCharacter);
4199 if (digit >= 0 && digit <= 7) {
4200 number = (number * 8) + digit;
4201 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4202 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4203 // Digit --> ignore last character
4206 digit = Character.getNumericValue(currentCharacter);
4207 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4208 // OctalDigit OctalDigit
4209 number = (number * 8) + digit;
4210 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4211 // --> ignore last character
4215 } else { // has read \OctalDigit NonDigit--> ignore last
4219 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4223 } else { // has read \OctalDigit --> ignore last character
4227 throw new InvalidInputException(INVALID_ESCAPE);
4228 currentCharacter = (char) number;
4230 throw new InvalidInputException(INVALID_ESCAPE);
4234 //chech presence of task: tags
4235 //TODO (frederic) see if we need to take unicode characters into account...
4236 public void checkTaskTag(int commentStart, int commentEnd) {
4237 char[] src = this.source;
4239 // only look for newer task: tags
4240 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4243 int foundTaskIndex = this.foundTaskCount;
4244 char previous = src[commentStart + 1]; // should be '*' or '/'
4245 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4247 char[] priority = null;
4248 // check for tag occurrence only if not ambiguous with javadoc tag
4249 if (previous != '@') {
4250 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4251 tag = this.taskTags[itag];
4252 int tagLength = tag.length;
4256 // ensure tag is not leaded with letter if tag starts with a letter
4257 if (Scanner.isPHPIdentifierStart(tag[0])) {
4258 if (Scanner.isPHPIdentifierPart(previous)) {
4263 for (int t = 0; t < tagLength; t++) {
4266 if (x >= this.eofPosition || x >= commentEnd)
4268 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4269 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4274 // ensure tag is not followed with letter if tag finishes with a letter
4275 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4276 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4279 if (this.foundTaskTags == null) {
4280 this.foundTaskTags = new char[5][];
4281 this.foundTaskMessages = new char[5][];
4282 this.foundTaskPriorities = new char[5][];
4283 this.foundTaskPositions = new int[5][];
4284 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4285 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4286 this.foundTaskCount);
4287 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4288 this.foundTaskCount);
4289 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4290 this.foundTaskCount);
4291 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4292 this.foundTaskCount);
4295 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4297 this.foundTaskTags[this.foundTaskCount] = tag;
4298 this.foundTaskPriorities[this.foundTaskCount] = priority;
4299 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4300 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4301 this.foundTaskCount++;
4302 i += tagLength - 1; // will be incremented when looping
4308 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4309 // retrieve message start and end positions
4310 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4311 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4312 // at most beginning of next task
4313 if (max_value < msgStart) {
4314 max_value = msgStart; // would only occur if tag is before EOF.
4318 for (int j = msgStart; j < max_value; j++) {
4319 if ((c = src[j]) == '\n' || c == '\r') {
4325 for (int j = max_value; j > msgStart; j--) {
4326 if ((c = src[j]) == '*') {
4334 if (msgStart == end)
4337 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4339 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4341 // update the end position of the task
4342 this.foundTaskPositions[i][1] = end;
4343 // get the message source
4344 final int messageLength = end - msgStart + 1;
4345 char[] message = new char[messageLength];
4346 System.arraycopy(src, msgStart, message, 0, messageLength);
4347 this.foundTaskMessages[i] = message;
4351 // chech presence of task: tags
4352 // public void checkTaskTag(int commentStart, int commentEnd) {
4353 // // only look for newer task: tags
4354 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4357 // int foundTaskIndex = this.foundTaskCount;
4358 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4359 // char[] tag = null;
4360 // char[] priority = null;
4361 // // check for tag occurrence
4362 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4363 // tag = this.taskTags[itag];
4364 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4365 // int tagLength = tag.length;
4366 // for (int t = 0; t < tagLength; t++) {
4367 // if (this.source[i + t] != tag[t])
4368 // continue nextTag;
4370 // if (this.foundTaskTags == null) {
4371 // this.foundTaskTags = new char[5][];
4372 // this.foundTaskMessages = new char[5][];
4373 // this.foundTaskPriorities = new char[5][];
4374 // this.foundTaskPositions = new int[5][];
4375 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4376 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4377 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4378 // this.foundTaskCount);
4379 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4380 // this.foundTaskCount);
4381 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4382 // this.foundTaskCount);
4384 // this.foundTaskTags[this.foundTaskCount] = tag;
4385 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4386 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4387 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4388 // this.foundTaskCount++;
4389 // i += tagLength - 1; // will be incremented when looping
4392 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4393 // // retrieve message start and end positions
4394 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4395 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4396 // // at most beginning of next task
4397 // if (max_value < msgStart)
4398 // max_value = msgStart; // would only occur if tag is before EOF.
4401 // for (int j = msgStart; j < max_value; j++) {
4402 // if ((c = this.source[j]) == '\n' || c == '\r') {
4408 // for (int j = max_value; j > msgStart; j--) {
4409 // if ((c = this.source[j]) == '*') {
4417 // if (msgStart == end)
4418 // continue; // empty
4419 // // trim the message
4420 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4422 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4424 // // update the end position of the task
4425 // this.foundTaskPositions[i][1] = end;
4426 // // get the message source
4427 // final int messageLength = end - msgStart + 1;
4428 // char[] message = new char[messageLength];
4429 // System.arraycopy(source, msgStart, message, 0, messageLength);
4430 // this.foundTaskMessages[i] = message;