1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public Stack encapsedStringStack = null;
44 public char currentCharacter;
46 public int startPosition;
48 public int currentPosition;
50 public int initialPosition, eofPosition;
52 // after this position eof are generated instead of real token from the
54 public boolean tokenizeComments;
56 public boolean tokenizeWhiteSpace;
58 public boolean tokenizeStrings;
60 //source should be viewed as a window (aka a part)
61 //of a entire very large stream
65 public char[] withoutUnicodeBuffer;
67 public int withoutUnicodePtr;
69 //when == 0 ==> no unicode in the current token
70 public boolean unicodeAsBackSlash = false;
72 public boolean scanningFloatLiteral = false;
74 //support for /** comments
75 public int[] commentStops = new int[10];
77 public int[] commentStarts = new int[10];
79 public int commentPtr = -1; // no comment test with commentPtr value -1
81 protected int lastCommentLinePosition = -1;
83 //diet parsing support - jump over some method body when requested
84 public boolean diet = false;
86 //support for the poor-line-debuggers ....
87 //remember the position of the cr/lf
88 public int[] lineEnds = new int[250];
90 public int linePtr = -1;
92 public boolean wasAcr = false;
94 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
96 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
98 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
100 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
102 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
104 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
106 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
108 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
110 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
112 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
116 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
118 //----------------optimized identifier managment------------------
119 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
120 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
121 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
122 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
123 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
124 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
125 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
126 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
127 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
129 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] { '$', 'c' },
130 charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$', 'f' },
131 charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' }, charArray_vi = new char[] { '$', 'i' },
132 charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' }, charArray_vl = new char[] { '$', 'l' },
133 charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' }, charArray_vo = new char[] { '$', 'o' },
134 charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' }, charArray_vr = new char[] { '$', 'r' },
135 charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' }, charArray_vu = new char[] { '$', 'u' },
136 charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' }, charArray_vx = new char[] { '$', 'x' },
137 charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
139 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
141 static final int TableSize = 30, InternalTableSize = 6;
144 public static final int OptimizedLength = 6;
147 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
149 // support for detecting non-externalized string literals
150 int currentLineNr = -1;
152 int previousLineNr = -1;
154 NLSLine currentLine = null;
156 List lines = new ArrayList();
158 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
160 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
162 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
164 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
166 public StringLiteral[] nonNLSStrings = null;
168 public boolean checkNonExternalizedStringLiterals = true;
170 public boolean wasNonExternalizedStringLiteral = false;
172 for (int i = 0; i < 6; i++) {
173 for (int j = 0; j < TableSize; j++) {
174 for (int k = 0; k < InternalTableSize; k++) {
175 charArray_length[i][j][k] = initCharArray;
181 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
183 public static final int RoundBracket = 0;
185 public static final int SquareBracket = 1;
187 public static final int CurlyBracket = 2;
189 public static final int BracketKinds = 3;
192 public char[][] foundTaskTags = null;
194 public char[][] foundTaskMessages;
196 public char[][] foundTaskPriorities = null;
198 public int[][] foundTaskPositions;
200 public int foundTaskCount = 0;
202 public char[][] taskTags = null;
204 public char[][] taskPriorities = null;
206 public boolean isTaskCaseSensitive = true;
208 public static final boolean DEBUG = false;
210 public static final boolean TRACE = false;
212 public ICompilationUnit compilationUnit = null;
215 * Determines if the specified character is permissible as the first character in a PHP identifier.
217 * The '$' character for HP variables isn't regarded as the first character !
219 public static boolean isPHPIdentifierStart(char ch) {
220 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
224 * Determines if the specified character may be part of a PHP identifier as other than the first character
226 public static boolean isPHPIdentifierPart(char ch) {
227 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
230 public final boolean atEnd() {
231 // This code is not relevant if source is
232 // Only a part of the real stream input
233 return source.length == currentPosition;
236 public char[] getCurrentIdentifierSource() {
237 //return the token REAL source (aka unicodes are precomputed)
239 // if (withoutUnicodePtr != 0)
240 // //0 is used as a fast test flag so the real first char is in position 1
242 // withoutUnicodeBuffer,
244 // result = new char[withoutUnicodePtr],
246 // withoutUnicodePtr);
248 int length = currentPosition - startPosition;
249 switch (length) { // see OptimizedLength
251 return optimizedCurrentTokenSource1();
253 return optimizedCurrentTokenSource2();
255 return optimizedCurrentTokenSource3();
257 return optimizedCurrentTokenSource4();
259 return optimizedCurrentTokenSource5();
261 return optimizedCurrentTokenSource6();
264 System.arraycopy(source, startPosition, result = new char[length], 0, length);
269 public int getCurrentTokenEndPosition() {
270 return this.currentPosition - 1;
273 public final char[] getCurrentTokenSource() {
274 // Return the token REAL source (aka unicodes are precomputed)
276 // if (withoutUnicodePtr != 0)
277 // // 0 is used as a fast test flag so the real first char is in position 1
279 // withoutUnicodeBuffer,
281 // result = new char[withoutUnicodePtr],
283 // withoutUnicodePtr);
286 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
291 public final char[] getCurrentTokenSource(int startPos) {
292 // Return the token REAL source (aka unicodes are precomputed)
294 // if (withoutUnicodePtr != 0)
295 // // 0 is used as a fast test flag so the real first char is in position 1
297 // withoutUnicodeBuffer,
299 // result = new char[withoutUnicodePtr],
301 // withoutUnicodePtr);
304 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
309 public final char[] getCurrentTokenSourceString() {
310 //return the token REAL source (aka unicodes are precomputed).
311 //REMOVE the two " that are at the beginning and the end.
313 if (withoutUnicodePtr != 0)
314 //0 is used as a fast test flag so the real first char is in position 1
315 System.arraycopy(withoutUnicodeBuffer, 2,
316 //2 is 1 (real start) + 1 (to jump over the ")
317 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
320 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
325 public int getCurrentTokenStartPosition() {
326 return this.startPosition;
329 public final char[] getCurrentStringLiteralSource() {
330 // Return the token REAL source (aka unicodes are precomputed)
331 if (startPosition + 1 >= currentPosition) {
336 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
341 public final char[] getCurrentStringLiteralSource(int startPos) {
342 // Return the token REAL source (aka unicodes are precomputed)
345 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
351 * Search the source position corresponding to the end of a given line number
353 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
355 * In case the given line number is inconsistent, answers -1.
357 public final int getLineEnd(int lineNumber) {
358 if (lineEnds == null)
360 if (lineNumber >= lineEnds.length)
364 if (lineNumber == lineEnds.length - 1)
366 return lineEnds[lineNumber - 1];
367 // next line start one character behind the lineEnd of the previous line
371 * Search the source position corresponding to the beginning of a given line number
373 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
375 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
377 * In case the given line number is inconsistent, answers -1.
379 public final int getLineStart(int lineNumber) {
380 if (lineEnds == null)
382 if (lineNumber >= lineEnds.length)
387 return initialPosition;
388 return lineEnds[lineNumber - 2] + 1;
389 // next line start one character behind the lineEnd of the previous line
392 public final boolean getNextChar(char testedChar) {
394 //handle the case of unicode.
395 //when a unicode appears then we must use a buffer that holds char
397 //At the end of this method currentCharacter holds the new visited char
398 //and currentPosition points right next after it
399 //Both previous lines are true if the currentCharacter is == to the
401 //On false, no side effect has occured.
402 //ALL getNextChar.... ARE OPTIMIZED COPIES
403 int temp = currentPosition;
405 currentCharacter = source[currentPosition++];
406 // if (((currentCharacter = source[currentPosition++]) == '\\')
407 // && (source[currentPosition] == 'u')) {
408 // //-------------unicode traitement ------------
409 // int c1, c2, c3, c4;
410 // int unicodeSize = 6;
411 // currentPosition++;
412 // while (source[currentPosition] == 'u') {
413 // currentPosition++;
417 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
419 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
421 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
423 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
425 // currentPosition = temp;
429 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
430 // if (currentCharacter != testedChar) {
431 // currentPosition = temp;
434 // unicodeAsBackSlash = currentCharacter == '\\';
436 // //need the unicode buffer
437 // if (withoutUnicodePtr == 0) {
438 // //buffer all the entries that have been left aside....
439 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
443 // withoutUnicodeBuffer,
445 // withoutUnicodePtr);
447 // //fill the buffer with the char
448 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
451 // } //-------------end unicode traitement--------------
453 if (currentCharacter != testedChar) {
454 currentPosition = temp;
457 unicodeAsBackSlash = false;
458 // if (withoutUnicodePtr != 0)
459 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
462 } catch (IndexOutOfBoundsException e) {
463 unicodeAsBackSlash = false;
464 currentPosition = temp;
469 public final int getNextChar(char testedChar1, char testedChar2) {
470 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
471 //test can be done with (x==0) for the first and (x>0) for the second
472 //handle the case of unicode.
473 //when a unicode appears then we must use a buffer that holds char
475 //At the end of this method currentCharacter holds the new visited char
476 //and currentPosition points right next after it
477 //Both previous lines are true if the currentCharacter is == to the
479 //On false, no side effect has occured.
480 //ALL getNextChar.... ARE OPTIMIZED COPIES
481 int temp = currentPosition;
484 currentCharacter = source[currentPosition++];
485 // if (((currentCharacter = source[currentPosition++]) == '\\')
486 // && (source[currentPosition] == 'u')) {
487 // //-------------unicode traitement ------------
488 // int c1, c2, c3, c4;
489 // int unicodeSize = 6;
490 // currentPosition++;
491 // while (source[currentPosition] == 'u') {
492 // currentPosition++;
496 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
498 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
500 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
502 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
504 // currentPosition = temp;
508 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
509 // if (currentCharacter == testedChar1)
511 // else if (currentCharacter == testedChar2)
514 // currentPosition = temp;
518 // //need the unicode buffer
519 // if (withoutUnicodePtr == 0) {
520 // //buffer all the entries that have been left aside....
521 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
525 // withoutUnicodeBuffer,
527 // withoutUnicodePtr);
529 // //fill the buffer with the char
530 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
532 // } //-------------end unicode traitement--------------
534 if (currentCharacter == testedChar1)
536 else if (currentCharacter == testedChar2)
539 currentPosition = temp;
542 // if (withoutUnicodePtr != 0)
543 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
546 } catch (IndexOutOfBoundsException e) {
547 currentPosition = temp;
552 public final boolean getNextCharAsDigit() {
554 //handle the case of unicode.
555 //when a unicode appears then we must use a buffer that holds char
557 //At the end of this method currentCharacter holds the new visited char
558 //and currentPosition points right next after it
559 //Both previous lines are true if the currentCharacter is a digit
560 //On false, no side effect has occured.
561 //ALL getNextChar.... ARE OPTIMIZED COPIES
562 int temp = currentPosition;
564 currentCharacter = source[currentPosition++];
565 // if (((currentCharacter = source[currentPosition++]) == '\\')
566 // && (source[currentPosition] == 'u')) {
567 // //-------------unicode traitement ------------
568 // int c1, c2, c3, c4;
569 // int unicodeSize = 6;
570 // currentPosition++;
571 // while (source[currentPosition] == 'u') {
572 // currentPosition++;
576 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
578 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
580 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
582 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
584 // currentPosition = temp;
588 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
589 // if (!Character.isDigit(currentCharacter)) {
590 // currentPosition = temp;
594 // //need the unicode buffer
595 // if (withoutUnicodePtr == 0) {
596 // //buffer all the entries that have been left aside....
597 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
601 // withoutUnicodeBuffer,
603 // withoutUnicodePtr);
605 // //fill the buffer with the char
606 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
608 // } //-------------end unicode traitement--------------
610 if (!Character.isDigit(currentCharacter)) {
611 currentPosition = temp;
614 // if (withoutUnicodePtr != 0)
615 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
618 } catch (IndexOutOfBoundsException e) {
619 currentPosition = temp;
624 public final boolean getNextCharAsDigit(int radix) {
626 //handle the case of unicode.
627 //when a unicode appears then we must use a buffer that holds char
629 //At the end of this method currentCharacter holds the new visited char
630 //and currentPosition points right next after it
631 //Both previous lines are true if the currentCharacter is a digit base on
633 //On false, no side effect has occured.
634 //ALL getNextChar.... ARE OPTIMIZED COPIES
635 int temp = currentPosition;
637 currentCharacter = source[currentPosition++];
638 // if (((currentCharacter = source[currentPosition++]) == '\\')
639 // && (source[currentPosition] == 'u')) {
640 // //-------------unicode traitement ------------
641 // int c1, c2, c3, c4;
642 // int unicodeSize = 6;
643 // currentPosition++;
644 // while (source[currentPosition] == 'u') {
645 // currentPosition++;
649 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
651 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
653 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
655 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
657 // currentPosition = temp;
661 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
662 // if (Character.digit(currentCharacter, radix) == -1) {
663 // currentPosition = temp;
667 // //need the unicode buffer
668 // if (withoutUnicodePtr == 0) {
669 // //buffer all the entries that have been left aside....
670 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
674 // withoutUnicodeBuffer,
676 // withoutUnicodePtr);
678 // //fill the buffer with the char
679 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
681 // } //-------------end unicode traitement--------------
683 if (Character.digit(currentCharacter, radix) == -1) {
684 currentPosition = temp;
687 // if (withoutUnicodePtr != 0)
688 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
691 } catch (IndexOutOfBoundsException e) {
692 currentPosition = temp;
697 public boolean getNextCharAsJavaIdentifierPart() {
699 //handle the case of unicode.
700 //when a unicode appears then we must use a buffer that holds char
702 //At the end of this method currentCharacter holds the new visited char
703 //and currentPosition points right next after it
704 //Both previous lines are true if the currentCharacter is a
705 // JavaIdentifierPart
706 //On false, no side effect has occured.
707 //ALL getNextChar.... ARE OPTIMIZED COPIES
708 int temp = currentPosition;
710 currentCharacter = source[currentPosition++];
711 // if (((currentCharacter = source[currentPosition++]) == '\\')
712 // && (source[currentPosition] == 'u')) {
713 // //-------------unicode traitement ------------
714 // int c1, c2, c3, c4;
715 // int unicodeSize = 6;
716 // currentPosition++;
717 // while (source[currentPosition] == 'u') {
718 // currentPosition++;
722 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
724 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
726 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
728 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
730 // currentPosition = temp;
734 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
735 // if (!isPHPIdentifierPart(currentCharacter)) {
736 // currentPosition = temp;
740 // //need the unicode buffer
741 // if (withoutUnicodePtr == 0) {
742 // //buffer all the entries that have been left aside....
743 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
747 // withoutUnicodeBuffer,
749 // withoutUnicodePtr);
751 // //fill the buffer with the char
752 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
754 // } //-------------end unicode traitement--------------
756 if (!isPHPIdentifierPart(currentCharacter)) {
757 currentPosition = temp;
760 // if (withoutUnicodePtr != 0)
761 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
764 } catch (IndexOutOfBoundsException e) {
765 currentPosition = temp;
770 public int getCastOrParen() {
771 int tempPosition = currentPosition;
772 char tempCharacter = currentCharacter;
773 int tempToken = TokenNameLPAREN;
774 boolean found = false;
775 StringBuffer buf = new StringBuffer();
778 currentCharacter = source[currentPosition++];
779 } while (currentCharacter == ' ' || currentCharacter == '\t');
780 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
781 buf.append(currentCharacter);
782 currentCharacter = source[currentPosition++];
784 if (buf.length() >= 3 && buf.length() <= 7) {
785 char[] data = buf.toString().toCharArray();
787 switch (data.length) {
790 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
792 tempToken = TokenNameintCAST;
797 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
799 tempToken = TokenNameboolCAST;
802 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
804 tempToken = TokenNamedoubleCAST;
810 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
811 && (data[++index] == 'y')) {
813 tempToken = TokenNamearrayCAST;
816 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
817 && (data[++index] == 't')) {
819 tempToken = TokenNameunsetCAST;
822 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
823 && (data[++index] == 't')) {
825 tempToken = TokenNamedoubleCAST;
831 // object string double
832 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
833 && (data[++index] == 'c') && (data[++index] == 't')) {
835 tempToken = TokenNameobjectCAST;
838 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
839 && (data[++index] == 'n') && (data[++index] == 'g')) {
841 tempToken = TokenNamestringCAST;
844 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
845 && (data[++index] == 'l') && (data[++index] == 'e')) {
847 tempToken = TokenNamedoubleCAST;
854 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
855 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
857 tempToken = TokenNameboolCAST;
860 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
861 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
863 tempToken = TokenNameintCAST;
869 while (currentCharacter == ' ' || currentCharacter == '\t') {
870 currentCharacter = source[currentPosition++];
872 if (currentCharacter == ')') {
877 } catch (IndexOutOfBoundsException e) {
879 currentCharacter = tempCharacter;
880 currentPosition = tempPosition;
881 return TokenNameLPAREN;
884 public void consumeStringInterpolated() throws InvalidInputException {
886 // consume next character
887 unicodeAsBackSlash = false;
888 currentCharacter = source[currentPosition++];
889 // if (((currentCharacter = source[currentPosition++]) == '\\')
890 // && (source[currentPosition] == 'u')) {
891 // getNextUnicodeChar();
893 // if (withoutUnicodePtr != 0) {
894 // withoutUnicodeBuffer[++withoutUnicodePtr] =
898 while (currentCharacter != '`') {
899 /** ** in PHP \r and \n are valid in string literals *** */
900 // if ((currentCharacter == '\n')
901 // || (currentCharacter == '\r')) {
902 // // relocate if finding another quote fairly close: thus unicode
903 // '/u000D' will be fully consumed
904 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
905 // if (currentPosition + lookAhead == source.length)
907 // if (source[currentPosition + lookAhead] == '\n')
909 // if (source[currentPosition + lookAhead] == '\"') {
910 // currentPosition += lookAhead + 1;
914 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
916 if (currentCharacter == '\\') {
917 int escapeSize = currentPosition;
918 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
919 //scanEscapeCharacter make a side effect on this value and we need
920 // the previous value few lines down this one
921 scanDoubleQuotedEscapeCharacter();
922 escapeSize = currentPosition - escapeSize;
923 if (withoutUnicodePtr == 0) {
924 //buffer all the entries that have been left aside....
925 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
926 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
927 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
928 } else { //overwrite the / in the buffer
929 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
930 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
931 // where only one is correct
936 // consume next character
937 unicodeAsBackSlash = false;
938 currentCharacter = source[currentPosition++];
939 // if (((currentCharacter = source[currentPosition++]) == '\\')
940 // && (source[currentPosition] == 'u')) {
941 // getNextUnicodeChar();
943 if (withoutUnicodePtr != 0) {
944 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
948 } catch (IndexOutOfBoundsException e) {
949 // reset end position for error reporting
950 currentPosition -= 2;
951 throw new InvalidInputException(UNTERMINATED_STRING);
952 } catch (InvalidInputException e) {
953 if (e.getMessage().equals(INVALID_ESCAPE)) {
954 // relocate if finding another quote fairly close: thus unicode
955 // '/u000D' will be fully consumed
956 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
957 if (currentPosition + lookAhead == source.length)
959 if (source[currentPosition + lookAhead] == '\n')
961 if (source[currentPosition + lookAhead] == '`') {
962 currentPosition += lookAhead + 1;
969 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
970 // //$NON-NLS-?$ where ? is an
972 if (currentLine == null) {
973 currentLine = new NLSLine();
974 lines.add(currentLine);
976 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
980 public void consumeStringConstant() throws InvalidInputException {
982 // consume next character
983 unicodeAsBackSlash = false;
984 currentCharacter = source[currentPosition++];
985 // if (((currentCharacter = source[currentPosition++]) == '\\')
986 // && (source[currentPosition] == 'u')) {
987 // getNextUnicodeChar();
989 // if (withoutUnicodePtr != 0) {
990 // withoutUnicodeBuffer[++withoutUnicodePtr] =
994 while (currentCharacter != '\'') {
995 /** ** in PHP \r and \n are valid in string literals *** */
996 // if ((currentCharacter == '\n')
997 // || (currentCharacter == '\r')) {
998 // // relocate if finding another quote fairly close: thus unicode
999 // '/u000D' will be fully consumed
1000 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1001 // if (currentPosition + lookAhead == source.length)
1003 // if (source[currentPosition + lookAhead] == '\n')
1005 // if (source[currentPosition + lookAhead] == '\"') {
1006 // currentPosition += lookAhead + 1;
1010 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1012 if (currentCharacter == '\\') {
1013 int escapeSize = currentPosition;
1014 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1015 //scanEscapeCharacter make a side effect on this value and we need
1016 // the previous value few lines down this one
1017 scanSingleQuotedEscapeCharacter();
1018 escapeSize = currentPosition - escapeSize;
1019 if (withoutUnicodePtr == 0) {
1020 //buffer all the entries that have been left aside....
1021 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1022 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1023 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1024 } else { //overwrite the / in the buffer
1025 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1026 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1027 // where only one is correct
1028 withoutUnicodePtr--;
1032 // consume next character
1033 unicodeAsBackSlash = false;
1034 currentCharacter = source[currentPosition++];
1035 // if (((currentCharacter = source[currentPosition++]) == '\\')
1036 // && (source[currentPosition] == 'u')) {
1037 // getNextUnicodeChar();
1039 if (withoutUnicodePtr != 0) {
1040 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1044 } catch (IndexOutOfBoundsException e) {
1045 // reset end position for error reporting
1046 currentPosition -= 2;
1047 throw new InvalidInputException(UNTERMINATED_STRING);
1048 } catch (InvalidInputException e) {
1049 if (e.getMessage().equals(INVALID_ESCAPE)) {
1050 // relocate if finding another quote fairly close: thus unicode
1051 // '/u000D' will be fully consumed
1052 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1053 if (currentPosition + lookAhead == source.length)
1055 if (source[currentPosition + lookAhead] == '\n')
1057 if (source[currentPosition + lookAhead] == '\'') {
1058 currentPosition += lookAhead + 1;
1065 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1066 // //$NON-NLS-?$ where ? is an
1068 if (currentLine == null) {
1069 currentLine = new NLSLine();
1070 lines.add(currentLine);
1072 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1076 public void consumeStringLiteral() throws InvalidInputException {
1078 // consume next character
1079 unicodeAsBackSlash = false;
1080 currentCharacter = source[currentPosition++];
1081 // if (((currentCharacter = source[currentPosition++]) == '\\')
1082 // && (source[currentPosition] == 'u')) {
1083 // getNextUnicodeChar();
1085 // if (withoutUnicodePtr != 0) {
1086 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1087 // currentCharacter;
1090 while (currentCharacter != '"') {
1091 /** ** in PHP \r and \n are valid in string literals *** */
1092 // if ((currentCharacter == '\n')
1093 // || (currentCharacter == '\r')) {
1094 // // relocate if finding another quote fairly close: thus unicode
1095 // '/u000D' will be fully consumed
1096 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1097 // if (currentPosition + lookAhead == source.length)
1099 // if (source[currentPosition + lookAhead] == '\n')
1101 // if (source[currentPosition + lookAhead] == '\"') {
1102 // currentPosition += lookAhead + 1;
1106 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1108 if (currentCharacter == '\\') {
1109 int escapeSize = currentPosition;
1110 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1111 //scanEscapeCharacter make a side effect on this value and we need
1112 // the previous value few lines down this one
1113 scanDoubleQuotedEscapeCharacter();
1114 escapeSize = currentPosition - escapeSize;
1115 if (withoutUnicodePtr == 0) {
1116 //buffer all the entries that have been left aside....
1117 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1118 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1119 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1120 } else { //overwrite the / in the buffer
1121 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1122 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1123 // where only one is correct
1124 withoutUnicodePtr--;
1128 // consume next character
1129 unicodeAsBackSlash = false;
1130 currentCharacter = source[currentPosition++];
1131 // if (((currentCharacter = source[currentPosition++]) == '\\')
1132 // && (source[currentPosition] == 'u')) {
1133 // getNextUnicodeChar();
1135 if (withoutUnicodePtr != 0) {
1136 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1140 } catch (IndexOutOfBoundsException e) {
1141 // reset end position for error reporting
1142 currentPosition -= 2;
1143 throw new InvalidInputException(UNTERMINATED_STRING);
1144 } catch (InvalidInputException e) {
1145 if (e.getMessage().equals(INVALID_ESCAPE)) {
1146 // relocate if finding another quote fairly close: thus unicode
1147 // '/u000D' will be fully consumed
1148 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1149 if (currentPosition + lookAhead == source.length)
1151 if (source[currentPosition + lookAhead] == '\n')
1153 if (source[currentPosition + lookAhead] == '\"') {
1154 currentPosition += lookAhead + 1;
1161 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1162 // //$NON-NLS-?$ where ? is an
1164 if (currentLine == null) {
1165 currentLine = new NLSLine();
1166 lines.add(currentLine);
1168 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1172 public int getNextToken() throws InvalidInputException {
1174 return getInlinedHTML(currentPosition);
1177 this.wasAcr = false;
1179 jumpOverMethodBody();
1181 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1185 withoutUnicodePtr = 0;
1186 //start with a new token
1187 char encapsedChar = ' ';
1188 if (!encapsedStringStack.isEmpty()) {
1189 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1191 if (encapsedChar != '$' && encapsedChar != ' ') {
1192 currentCharacter = source[currentPosition++];
1193 if (currentCharacter == encapsedChar) {
1194 switch (currentCharacter) {
1196 return TokenNameEncapsedString0;
1198 return TokenNameEncapsedString1;
1200 return TokenNameEncapsedString2;
1203 while (currentCharacter != encapsedChar) {
1204 /** ** in PHP \r and \n are valid in string literals *** */
1205 switch (currentCharacter) {
1207 int escapeSize = currentPosition;
1208 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1209 //scanEscapeCharacter make a side effect on this value and
1210 // we need the previous value few lines down this one
1211 scanDoubleQuotedEscapeCharacter();
1212 escapeSize = currentPosition - escapeSize;
1213 if (withoutUnicodePtr == 0) {
1214 //buffer all the entries that have been left aside....
1215 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1216 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1217 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1218 } else { //overwrite the / in the buffer
1219 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1220 if (backSlashAsUnicodeInString) { //there are TWO \ in
1221 withoutUnicodePtr--;
1226 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1228 encapsedStringStack.push(new Character('$'));
1229 return TokenNameSTRING;
1233 if (source[currentPosition] == '$') { // CURLY_OPEN
1235 encapsedStringStack.push(new Character('$'));
1236 return TokenNameSTRING;
1239 // consume next character
1240 unicodeAsBackSlash = false;
1241 currentCharacter = source[currentPosition++];
1242 if (withoutUnicodePtr != 0) {
1243 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1248 return TokenNameSTRING;
1250 // ---------Consume white space and handles startPosition---------
1251 int whiteStart = currentPosition;
1252 startPosition = currentPosition;
1253 currentCharacter = source[currentPosition++];
1254 if (encapsedChar == '$') {
1255 switch (currentCharacter) {
1257 currentCharacter = source[currentPosition++];
1258 return TokenNameSTRING;
1260 if (encapsedChar == '$') {
1261 if (getNextChar('$'))
1262 return TokenNameLBRACE_DOLLAR;
1264 return TokenNameLBRACE;
1266 return TokenNameRBRACE;
1268 return TokenNameLBRACKET;
1270 return TokenNameRBRACKET;
1272 if (tokenizeStrings) {
1273 consumeStringConstant();
1274 return TokenNameStringSingleQuote;
1276 return TokenNameEncapsedString1;
1278 return TokenNameEncapsedString2;
1280 if (tokenizeStrings) {
1281 consumeStringInterpolated();
1282 return TokenNameStringInterpolated;
1284 return TokenNameEncapsedString0;
1286 if (getNextChar('>'))
1287 return TokenNameMINUS_GREATER;
1288 return TokenNameSTRING;
1290 if (currentCharacter == '$') {
1291 int oldPosition = currentPosition;
1293 currentCharacter = source[currentPosition++];
1294 if (currentCharacter == '{') {
1295 return TokenNameDOLLAR_LBRACE;
1297 if (isPHPIdentifierStart(currentCharacter)) {
1298 return scanIdentifierOrKeyword(true);
1300 currentPosition = oldPosition;
1301 return TokenNameSTRING;
1303 } catch (IndexOutOfBoundsException e) {
1304 currentPosition = oldPosition;
1305 return TokenNameSTRING;
1308 if (isPHPIdentifierStart(currentCharacter))
1309 return scanIdentifierOrKeyword(false);
1310 if (Character.isDigit(currentCharacter))
1311 return scanNumber(false);
1312 return TokenNameERROR;
1315 // boolean isWhiteSpace;
1317 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1318 startPosition = currentPosition;
1319 currentCharacter = source[currentPosition++];
1320 // if (((currentCharacter = source[currentPosition++]) == '\\')
1321 // && (source[currentPosition] == 'u')) {
1322 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1324 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1325 checkNonExternalizeString();
1326 if (recordLineSeparator) {
1327 pushLineSeparator();
1332 // isWhiteSpace = (currentCharacter == ' ')
1333 // || Character.isWhitespace(currentCharacter);
1336 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1337 // reposition scanner in case we are interested by spaces as tokens
1339 startPosition = whiteStart;
1340 return TokenNameWHITESPACE;
1342 //little trick to get out in the middle of a source compuation
1343 if (currentPosition > eofPosition)
1344 return TokenNameEOF;
1345 // ---------Identify the next token-------------
1346 switch (currentCharacter) {
1348 return getCastOrParen();
1350 return TokenNameRPAREN;
1352 return TokenNameLBRACE;
1354 return TokenNameRBRACE;
1356 return TokenNameLBRACKET;
1358 return TokenNameRBRACKET;
1360 return TokenNameSEMICOLON;
1362 return TokenNameCOMMA;
1364 if (getNextChar('='))
1365 return TokenNameDOT_EQUAL;
1366 if (getNextCharAsDigit())
1367 return scanNumber(true);
1368 return TokenNameDOT;
1371 if ((test = getNextChar('+', '=')) == 0)
1372 return TokenNamePLUS_PLUS;
1374 return TokenNamePLUS_EQUAL;
1375 return TokenNamePLUS;
1379 if ((test = getNextChar('-', '=')) == 0)
1380 return TokenNameMINUS_MINUS;
1382 return TokenNameMINUS_EQUAL;
1383 if (getNextChar('>'))
1384 return TokenNameMINUS_GREATER;
1385 return TokenNameMINUS;
1388 if (getNextChar('='))
1389 return TokenNameTWIDDLE_EQUAL;
1390 return TokenNameTWIDDLE;
1392 if (getNextChar('=')) {
1393 if (getNextChar('=')) {
1394 return TokenNameNOT_EQUAL_EQUAL;
1396 return TokenNameNOT_EQUAL;
1398 return TokenNameNOT;
1400 if (getNextChar('='))
1401 return TokenNameMULTIPLY_EQUAL;
1402 return TokenNameMULTIPLY;
1404 if (getNextChar('='))
1405 return TokenNameREMAINDER_EQUAL;
1406 return TokenNameREMAINDER;
1408 int oldPosition = currentPosition;
1410 currentCharacter = source[currentPosition++];
1411 } catch (IndexOutOfBoundsException e) {
1412 currentPosition = oldPosition;
1413 return TokenNameLESS;
1415 switch (currentCharacter) {
1417 return TokenNameLESS_EQUAL;
1419 return TokenNameNOT_EQUAL;
1421 if (getNextChar('='))
1422 return TokenNameLEFT_SHIFT_EQUAL;
1423 if (getNextChar('<')) {
1424 currentCharacter = source[currentPosition++];
1425 while (Character.isWhitespace(currentCharacter)) {
1426 currentCharacter = source[currentPosition++];
1428 int heredocStart = currentPosition - 1;
1429 int heredocLength = 0;
1430 if (isPHPIdentifierStart(currentCharacter)) {
1431 currentCharacter = source[currentPosition++];
1433 return TokenNameERROR;
1435 while (isPHPIdentifierPart(currentCharacter)) {
1436 currentCharacter = source[currentPosition++];
1438 heredocLength = currentPosition - heredocStart - 1;
1439 // heredoc end-tag determination
1440 boolean endTag = true;
1443 ch = source[currentPosition++];
1444 if (ch == '\r' || ch == '\n') {
1445 if (recordLineSeparator) {
1446 pushLineSeparator();
1450 for (int i = 0; i < heredocLength; i++) {
1451 if (source[currentPosition + i] != source[heredocStart + i]) {
1457 currentPosition += heredocLength - 1;
1458 currentCharacter = source[currentPosition++];
1459 break; // do...while loop
1465 return TokenNameHEREDOC;
1467 return TokenNameLEFT_SHIFT;
1469 currentPosition = oldPosition;
1470 return TokenNameLESS;
1474 if ((test = getNextChar('=', '>')) == 0)
1475 return TokenNameGREATER_EQUAL;
1477 if ((test = getNextChar('=', '>')) == 0)
1478 return TokenNameRIGHT_SHIFT_EQUAL;
1479 return TokenNameRIGHT_SHIFT;
1481 return TokenNameGREATER;
1484 if (getNextChar('=')) {
1485 if (getNextChar('=')) {
1486 return TokenNameEQUAL_EQUAL_EQUAL;
1488 return TokenNameEQUAL_EQUAL;
1490 if (getNextChar('>'))
1491 return TokenNameEQUAL_GREATER;
1492 return TokenNameEQUAL;
1495 if ((test = getNextChar('&', '=')) == 0)
1496 return TokenNameAND_AND;
1498 return TokenNameAND_EQUAL;
1499 return TokenNameAND;
1503 if ((test = getNextChar('|', '=')) == 0)
1504 return TokenNameOR_OR;
1506 return TokenNameOR_EQUAL;
1510 if (getNextChar('='))
1511 return TokenNameXOR_EQUAL;
1512 return TokenNameXOR;
1514 if (getNextChar('>')) {
1516 if (currentPosition == source.length) {
1518 return TokenNameINLINE_HTML;
1520 return getInlinedHTML(currentPosition - 2);
1522 return TokenNameQUESTION;
1524 if (getNextChar(':'))
1525 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1526 return TokenNameCOLON;
1530 consumeStringConstant();
1531 return TokenNameStringSingleQuote;
1533 if (tokenizeStrings) {
1534 consumeStringLiteral();
1535 return TokenNameStringDoubleQuote;
1537 return TokenNameEncapsedString2;
1539 if (tokenizeStrings) {
1540 consumeStringInterpolated();
1541 return TokenNameStringInterpolated;
1543 return TokenNameEncapsedString0;
1546 char startChar = currentCharacter;
1547 if (getNextChar('=') && startChar == '/') {
1548 return TokenNameDIVIDE_EQUAL;
1551 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1553 this.lastCommentLinePosition = this.currentPosition;
1554 int endPositionForLineComment = 0;
1555 try { //get the next char
1556 currentCharacter = source[currentPosition++];
1557 // if (((currentCharacter = source[currentPosition++])
1559 // && (source[currentPosition] == 'u')) {
1560 // //-------------unicode traitement ------------
1561 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1562 // currentPosition++;
1563 // while (source[currentPosition] == 'u') {
1564 // currentPosition++;
1567 // Character.getNumericValue(source[currentPosition++]))
1571 // Character.getNumericValue(source[currentPosition++]))
1575 // Character.getNumericValue(source[currentPosition++]))
1579 // Character.getNumericValue(source[currentPosition++]))
1583 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1585 // currentCharacter =
1586 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1589 //handle the \\u case manually into comment
1590 // if (currentCharacter == '\\') {
1591 // if (source[currentPosition] == '\\')
1592 // currentPosition++;
1593 // } //jump over the \\
1594 boolean isUnicode = false;
1595 while (currentCharacter != '\r' && currentCharacter != '\n') {
1596 this.lastCommentLinePosition = this.currentPosition;
1597 if (currentCharacter == '?') {
1598 if (getNextChar('>')) {
1599 startPosition = currentPosition - 2;
1601 return TokenNameINLINE_HTML;
1606 currentCharacter = source[currentPosition++];
1607 // if (((currentCharacter = source[currentPosition++])
1609 // && (source[currentPosition] == 'u')) {
1610 // isUnicode = true;
1611 // //-------------unicode traitement ------------
1612 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1613 // currentPosition++;
1614 // while (source[currentPosition] == 'u') {
1615 // currentPosition++;
1618 // Character.getNumericValue(source[currentPosition++]))
1622 // Character.getNumericValue(
1623 // source[currentPosition++]))
1627 // Character.getNumericValue(
1628 // source[currentPosition++]))
1632 // Character.getNumericValue(
1633 // source[currentPosition++]))
1637 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1639 // currentCharacter =
1640 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1643 //handle the \\u case manually into comment
1644 // if (currentCharacter == '\\') {
1645 // if (source[currentPosition] == '\\')
1646 // currentPosition++;
1647 // } //jump over the \\
1650 endPositionForLineComment = currentPosition - 6;
1652 endPositionForLineComment = currentPosition - 1;
1654 // recordComment(false);
1655 recordComment(TokenNameCOMMENT_LINE);
1656 if (this.taskTags != null)
1657 checkTaskTag(this.startPosition, this.currentPosition);
1658 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1659 checkNonExternalizeString();
1660 if (recordLineSeparator) {
1662 pushUnicodeLineSeparator();
1664 pushLineSeparator();
1670 if (tokenizeComments) {
1672 currentPosition = endPositionForLineComment;
1673 // reset one character behind
1675 return TokenNameCOMMENT_LINE;
1677 } catch (IndexOutOfBoundsException e) { //an eof will them
1679 if (tokenizeComments) {
1681 // reset one character behind
1682 return TokenNameCOMMENT_LINE;
1688 //traditional and annotation comment
1689 boolean isJavadoc = false, star = false;
1690 // consume next character
1691 unicodeAsBackSlash = false;
1692 currentCharacter = source[currentPosition++];
1693 // if (((currentCharacter = source[currentPosition++]) ==
1695 // && (source[currentPosition] == 'u')) {
1696 // getNextUnicodeChar();
1698 // if (withoutUnicodePtr != 0) {
1699 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1700 // currentCharacter;
1703 if (currentCharacter == '*') {
1707 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1708 checkNonExternalizeString();
1709 if (recordLineSeparator) {
1710 pushLineSeparator();
1715 try { //get the next char
1716 currentCharacter = source[currentPosition++];
1717 // if (((currentCharacter = source[currentPosition++])
1719 // && (source[currentPosition] == 'u')) {
1720 // //-------------unicode traitement ------------
1721 // getNextUnicodeChar();
1723 //handle the \\u case manually into comment
1724 // if (currentCharacter == '\\') {
1725 // if (source[currentPosition] == '\\')
1726 // currentPosition++;
1727 // //jump over the \\
1729 // empty comment is not a javadoc /**/
1730 if (currentCharacter == '/') {
1733 //loop until end of comment */
1734 while ((currentCharacter != '/') || (!star)) {
1735 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1736 checkNonExternalizeString();
1737 if (recordLineSeparator) {
1738 pushLineSeparator();
1743 star = currentCharacter == '*';
1745 currentCharacter = source[currentPosition++];
1746 // if (((currentCharacter = source[currentPosition++])
1748 // && (source[currentPosition] == 'u')) {
1749 // //-------------unicode traitement ------------
1750 // getNextUnicodeChar();
1752 //handle the \\u case manually into comment
1753 // if (currentCharacter == '\\') {
1754 // if (source[currentPosition] == '\\')
1755 // currentPosition++;
1756 // } //jump over the \\
1758 //recordComment(isJavadoc);
1760 recordComment(TokenNameCOMMENT_PHPDOC);
1762 recordComment(TokenNameCOMMENT_BLOCK);
1765 if (tokenizeComments) {
1767 return TokenNameCOMMENT_PHPDOC;
1768 return TokenNameCOMMENT_BLOCK;
1771 if (this.taskTags != null) {
1772 checkTaskTag(this.startPosition, this.currentPosition);
1774 } catch (IndexOutOfBoundsException e) {
1775 // reset end position for error reporting
1776 currentPosition -= 2;
1777 throw new InvalidInputException(UNTERMINATED_COMMENT);
1781 return TokenNameDIVIDE;
1785 return TokenNameEOF;
1786 //the atEnd may not be <currentPosition == source.length> if
1787 // source is only some part of a real (external) stream
1788 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1790 if (currentCharacter == '$') {
1791 int oldPosition = currentPosition;
1793 currentCharacter = source[currentPosition++];
1794 if (isPHPIdentifierStart(currentCharacter)) {
1795 return scanIdentifierOrKeyword(true);
1797 currentPosition = oldPosition;
1798 return TokenNameDOLLAR;
1800 } catch (IndexOutOfBoundsException e) {
1801 currentPosition = oldPosition;
1802 return TokenNameDOLLAR;
1805 if (isPHPIdentifierStart(currentCharacter))
1806 return scanIdentifierOrKeyword(false);
1807 if (Character.isDigit(currentCharacter))
1808 return scanNumber(false);
1809 return TokenNameERROR;
1812 } //-----------------end switch while try--------------------
1813 catch (IndexOutOfBoundsException e) {
1816 return TokenNameEOF;
1819 private int getInlinedHTML(int start) throws InvalidInputException {
1820 int token = getInlinedHTMLToken(start);
1821 if (token == TokenNameINLINE_HTML) {
1822 // Stack stack = new Stack();
1823 // // scan html for errors
1824 // Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1825 // int lastPHPEndPos=0;
1826 // for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1827 // Tag tag=(Tag)i.next();
1829 // if (tag instanceof StartTag) {
1830 // StartTag startTag=(StartTag)tag;
1831 // // System.out.println("startTag: "+tag);
1832 // if (startTag.isServerTag()) {
1833 // // TODO : what to do with a server tag ?
1835 // // do whatever with HTML start tag
1836 // // use startTag.getElement() to find the element corresponding
1837 // // to this start tag which may be useful if you implement code
1839 // stack.push(startTag);
1842 // EndTag endTag=(EndTag)tag;
1843 // StartTag stag = (StartTag) stack.peek();
1844 //// System.out.println("endTag: "+tag);
1845 // // do whatever with HTML end tag.
1854 * @throws InvalidInputException
1856 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1857 // int htmlPosition = start;
1858 if (currentPosition > source.length) {
1859 currentPosition = source.length;
1860 return TokenNameEOF;
1862 startPosition = start;
1865 currentCharacter = source[currentPosition++];
1866 if (currentCharacter == '<') {
1867 if (getNextChar('?')) {
1868 currentCharacter = source[currentPosition++];
1869 if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1871 if (ignorePHPOneLiner) {
1872 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1874 return TokenNameINLINE_HTML;
1878 return TokenNameINLINE_HTML;
1881 boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1883 int test = getNextChar('H', 'h');
1885 test = getNextChar('P', 'p');
1888 if (ignorePHPOneLiner) {
1889 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1891 return TokenNameINLINE_HTML;
1895 return TokenNameINLINE_HTML;
1903 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1904 if (recordLineSeparator) {
1905 pushLineSeparator();
1910 } //-----------------while--------------------
1912 return TokenNameINLINE_HTML;
1913 } //-----------------try--------------------
1914 catch (IndexOutOfBoundsException e) {
1915 startPosition = start;
1919 return TokenNameINLINE_HTML;
1925 private int lookAheadLinePHPTag() {
1926 // check if the PHP is only in this line (for CodeFormatter)
1927 int currentPositionInLine = currentPosition;
1928 char previousCharInLine = ' ';
1929 char currentCharInLine = ' ';
1930 boolean singleQuotedStringActive = false;
1931 boolean doubleQuotedStringActive = false;
1934 // look ahead in this line
1936 previousCharInLine = currentCharInLine;
1937 currentCharInLine = source[currentPositionInLine++];
1938 switch (currentCharInLine) {
1940 if (previousCharInLine == '?') {
1941 // update the scanner's current Position in the source
1942 currentPosition = currentPositionInLine;
1943 // use as "dummy" token
1944 return TokenNameEOF;
1948 if (doubleQuotedStringActive) {
1949 if (previousCharInLine != '\\') {
1950 doubleQuotedStringActive = false;
1953 if (!singleQuotedStringActive) {
1954 doubleQuotedStringActive = true;
1959 if (singleQuotedStringActive) {
1960 if (previousCharInLine != '\\') {
1961 singleQuotedStringActive = false;
1964 if (!doubleQuotedStringActive) {
1965 singleQuotedStringActive = true;
1971 return TokenNameINLINE_HTML;
1973 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1975 return TokenNameINLINE_HTML;
1979 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1981 return TokenNameINLINE_HTML;
1985 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1987 return TokenNameINLINE_HTML;
1992 } catch (IndexOutOfBoundsException e) {
1994 currentPosition = currentPositionInLine;
1995 return TokenNameINLINE_HTML;
1999 // public final void getNextUnicodeChar()
2000 // throws IndexOutOfBoundsException, InvalidInputException {
2002 // //handle the case of unicode.
2003 // //when a unicode appears then we must use a buffer that holds char
2005 // //At the end of this method currentCharacter holds the new visited char
2006 // //and currentPosition points right next after it
2008 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2010 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2011 // currentPosition++;
2012 // while (source[currentPosition] == 'u') {
2013 // currentPosition++;
2017 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2019 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2021 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2023 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2025 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2027 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2028 // //need the unicode buffer
2029 // if (withoutUnicodePtr == 0) {
2030 // //buffer all the entries that have been left aside....
2031 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2032 // System.arraycopy(
2035 // withoutUnicodeBuffer,
2037 // withoutUnicodePtr);
2039 // //fill the buffer with the char
2040 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2042 // unicodeAsBackSlash = currentCharacter == '\\';
2045 * Tokenize a method body, assuming that curly brackets are properly balanced.
2047 public final void jumpOverMethodBody() {
2048 this.wasAcr = false;
2051 while (true) { //loop for jumping over comments
2052 // ---------Consume white space and handles startPosition---------
2053 boolean isWhiteSpace;
2055 startPosition = currentPosition;
2056 currentCharacter = source[currentPosition++];
2057 // if (((currentCharacter = source[currentPosition++]) == '\\')
2058 // && (source[currentPosition] == 'u')) {
2059 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2061 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2062 pushLineSeparator();
2063 isWhiteSpace = Character.isWhitespace(currentCharacter);
2065 } while (isWhiteSpace);
2066 // -------consume token until } is found---------
2067 switch (currentCharacter) {
2078 test = getNextChar('\\');
2081 scanDoubleQuotedEscapeCharacter();
2082 } catch (InvalidInputException ex) {
2086 // try { // consume next character
2087 unicodeAsBackSlash = false;
2088 currentCharacter = source[currentPosition++];
2089 // if (((currentCharacter = source[currentPosition++]) == '\\')
2090 // && (source[currentPosition] == 'u')) {
2091 // getNextUnicodeChar();
2093 if (withoutUnicodePtr != 0) {
2094 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2097 // } catch (InvalidInputException ex) {
2105 // try { // consume next character
2106 unicodeAsBackSlash = false;
2107 currentCharacter = source[currentPosition++];
2108 // if (((currentCharacter = source[currentPosition++]) == '\\')
2109 // && (source[currentPosition] == 'u')) {
2110 // getNextUnicodeChar();
2112 if (withoutUnicodePtr != 0) {
2113 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2116 // } catch (InvalidInputException ex) {
2118 while (currentCharacter != '"') {
2119 if (currentCharacter == '\r') {
2120 if (source[currentPosition] == '\n')
2123 // the string cannot go further that the line
2125 if (currentCharacter == '\n') {
2127 // the string cannot go further that the line
2129 if (currentCharacter == '\\') {
2131 scanDoubleQuotedEscapeCharacter();
2132 } catch (InvalidInputException ex) {
2136 // try { // consume next character
2137 unicodeAsBackSlash = false;
2138 currentCharacter = source[currentPosition++];
2139 // if (((currentCharacter = source[currentPosition++]) == '\\')
2140 // && (source[currentPosition] == 'u')) {
2141 // getNextUnicodeChar();
2143 if (withoutUnicodePtr != 0) {
2144 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2147 // } catch (InvalidInputException ex) {
2150 } catch (IndexOutOfBoundsException e) {
2156 if ((test = getNextChar('/', '*')) == 0) {
2160 currentCharacter = source[currentPosition++];
2161 // if (((currentCharacter = source[currentPosition++]) ==
2163 // && (source[currentPosition] == 'u')) {
2164 // //-------------unicode traitement ------------
2165 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2166 // currentPosition++;
2167 // while (source[currentPosition] == 'u') {
2168 // currentPosition++;
2171 // Character.getNumericValue(source[currentPosition++]))
2175 // Character.getNumericValue(source[currentPosition++]))
2179 // Character.getNumericValue(source[currentPosition++]))
2183 // Character.getNumericValue(source[currentPosition++]))
2186 // //error don't care of the value
2187 // currentCharacter = 'A';
2188 // } //something different from \n and \r
2190 // currentCharacter =
2191 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2194 while (currentCharacter != '\r' && currentCharacter != '\n') {
2196 currentCharacter = source[currentPosition++];
2197 // if (((currentCharacter = source[currentPosition++])
2199 // && (source[currentPosition] == 'u')) {
2200 // //-------------unicode traitement ------------
2201 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2202 // currentPosition++;
2203 // while (source[currentPosition] == 'u') {
2204 // currentPosition++;
2207 // Character.getNumericValue(source[currentPosition++]))
2211 // Character.getNumericValue(source[currentPosition++]))
2215 // Character.getNumericValue(source[currentPosition++]))
2219 // Character.getNumericValue(source[currentPosition++]))
2222 // //error don't care of the value
2223 // currentCharacter = 'A';
2224 // } //something different from \n and \r
2226 // currentCharacter =
2227 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2231 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2232 pushLineSeparator();
2233 } catch (IndexOutOfBoundsException e) {
2234 } //an eof will them be generated
2238 //traditional and annotation comment
2239 boolean star = false;
2240 // try { // consume next character
2241 unicodeAsBackSlash = false;
2242 currentCharacter = source[currentPosition++];
2243 // if (((currentCharacter = source[currentPosition++]) == '\\')
2244 // && (source[currentPosition] == 'u')) {
2245 // getNextUnicodeChar();
2247 if (withoutUnicodePtr != 0) {
2248 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2251 // } catch (InvalidInputException ex) {
2253 if (currentCharacter == '*') {
2256 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2257 pushLineSeparator();
2258 try { //get the next char
2259 currentCharacter = source[currentPosition++];
2260 // if (((currentCharacter = source[currentPosition++]) ==
2262 // && (source[currentPosition] == 'u')) {
2263 // //-------------unicode traitement ------------
2264 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2265 // currentPosition++;
2266 // while (source[currentPosition] == 'u') {
2267 // currentPosition++;
2270 // Character.getNumericValue(source[currentPosition++]))
2274 // Character.getNumericValue(source[currentPosition++]))
2278 // Character.getNumericValue(source[currentPosition++]))
2282 // Character.getNumericValue(source[currentPosition++]))
2285 // //error don't care of the value
2286 // currentCharacter = 'A';
2287 // } //something different from * and /
2289 // currentCharacter =
2290 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2293 //loop until end of comment */
2294 while ((currentCharacter != '/') || (!star)) {
2295 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2296 pushLineSeparator();
2297 star = currentCharacter == '*';
2299 currentCharacter = source[currentPosition++];
2300 // if (((currentCharacter = source[currentPosition++])
2302 // && (source[currentPosition] == 'u')) {
2303 // //-------------unicode traitement ------------
2304 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2305 // currentPosition++;
2306 // while (source[currentPosition] == 'u') {
2307 // currentPosition++;
2310 // Character.getNumericValue(source[currentPosition++]))
2314 // Character.getNumericValue(source[currentPosition++]))
2318 // Character.getNumericValue(source[currentPosition++]))
2322 // Character.getNumericValue(source[currentPosition++]))
2325 // //error don't care of the value
2326 // currentCharacter = 'A';
2327 // } //something different from * and /
2329 // currentCharacter =
2330 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2334 } catch (IndexOutOfBoundsException e) {
2342 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2344 scanIdentifierOrKeyword((currentCharacter == '$'));
2345 } catch (InvalidInputException ex) {
2350 if (Character.isDigit(currentCharacter)) {
2353 } catch (InvalidInputException ex) {
2360 //-----------------end switch while try--------------------
2361 } catch (IndexOutOfBoundsException e) {
2362 } catch (InvalidInputException e) {
2367 // public final boolean jumpOverUnicodeWhiteSpace()
2368 // throws InvalidInputException {
2370 // //handle the case of unicode. Jump over the next whiteSpace
2371 // //making startPosition pointing on the next available char
2372 // //On false, the currentCharacter is filled up with a potential
2376 // this.wasAcr = false;
2377 // int c1, c2, c3, c4;
2378 // int unicodeSize = 6;
2379 // currentPosition++;
2380 // while (source[currentPosition] == 'u') {
2381 // currentPosition++;
2385 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2387 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2389 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2391 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2393 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2396 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2397 // if (recordLineSeparator
2398 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2399 // pushLineSeparator();
2400 // if (Character.isWhitespace(currentCharacter))
2403 // //buffer the new char which is not a white space
2404 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2405 // //withoutUnicodePtr == 1 is true here
2407 // } catch (IndexOutOfBoundsException e) {
2408 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2411 public final int[] getLineEnds() {
2412 //return a bounded copy of this.lineEnds
2414 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2418 public char[] getSource() {
2422 public static boolean isIdentifierOrKeyword(int token) {
2423 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2426 final char[] optimizedCurrentTokenSource1() {
2427 //return always the same char[] build only once
2428 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2429 char charOne = source[startPosition];
2484 return new char[] { charOne };
2488 final char[] optimizedCurrentTokenSource2() {
2490 c0 = source[startPosition];
2491 c1 = source[startPosition + 1];
2493 //return always the same char[] build only once
2494 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2497 return charArray_va;
2499 return charArray_vb;
2501 return charArray_vc;
2503 return charArray_vd;
2505 return charArray_ve;
2507 return charArray_vf;
2509 return charArray_vg;
2511 return charArray_vh;
2513 return charArray_vi;
2515 return charArray_vj;
2517 return charArray_vk;
2519 return charArray_vl;
2521 return charArray_vm;
2523 return charArray_vn;
2525 return charArray_vo;
2527 return charArray_vp;
2529 return charArray_vq;
2531 return charArray_vr;
2533 return charArray_vs;
2535 return charArray_vt;
2537 return charArray_vu;
2539 return charArray_vv;
2541 return charArray_vw;
2543 return charArray_vx;
2545 return charArray_vy;
2547 return charArray_vz;
2550 //try to return the same char[] build only once
2551 int hash = ((c0 << 6) + c1) % TableSize;
2552 char[][] table = charArray_length[0][hash];
2554 while (++i < InternalTableSize) {
2555 char[] charArray = table[i];
2556 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2559 //---------other side---------
2561 int max = newEntry2;
2562 while (++i <= max) {
2563 char[] charArray = table[i];
2564 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2567 //--------add the entry-------
2568 if (++max >= InternalTableSize)
2571 table[max] = (r = new char[] { c0, c1 });
2576 final char[] optimizedCurrentTokenSource3() {
2577 //try to return the same char[] build only once
2579 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2581 char[][] table = charArray_length[1][hash];
2583 while (++i < InternalTableSize) {
2584 char[] charArray = table[i];
2585 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2588 //---------other side---------
2590 int max = newEntry3;
2591 while (++i <= max) {
2592 char[] charArray = table[i];
2593 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2596 //--------add the entry-------
2597 if (++max >= InternalTableSize)
2600 table[max] = (r = new char[] { c0, c1, c2 });
2605 final char[] optimizedCurrentTokenSource4() {
2606 //try to return the same char[] build only once
2607 char c0, c1, c2, c3;
2608 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2609 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2611 char[][] table = charArray_length[2][(int) hash];
2613 while (++i < InternalTableSize) {
2614 char[] charArray = table[i];
2615 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2618 //---------other side---------
2620 int max = newEntry4;
2621 while (++i <= max) {
2622 char[] charArray = table[i];
2623 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2626 //--------add the entry-------
2627 if (++max >= InternalTableSize)
2630 table[max] = (r = new char[] { c0, c1, c2, c3 });
2635 final char[] optimizedCurrentTokenSource5() {
2636 //try to return the same char[] build only once
2637 char c0, c1, c2, c3, c4;
2638 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2639 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2641 char[][] table = charArray_length[3][(int) hash];
2643 while (++i < InternalTableSize) {
2644 char[] charArray = table[i];
2645 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2648 //---------other side---------
2650 int max = newEntry5;
2651 while (++i <= max) {
2652 char[] charArray = table[i];
2653 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2656 //--------add the entry-------
2657 if (++max >= InternalTableSize)
2660 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2665 final char[] optimizedCurrentTokenSource6() {
2666 //try to return the same char[] build only once
2667 char c0, c1, c2, c3, c4, c5;
2668 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2669 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2670 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2672 char[][] table = charArray_length[4][(int) hash];
2674 while (++i < InternalTableSize) {
2675 char[] charArray = table[i];
2676 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2677 && (c5 == charArray[5]))
2680 //---------other side---------
2682 int max = newEntry6;
2683 while (++i <= max) {
2684 char[] charArray = table[i];
2685 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2686 && (c5 == charArray[5]))
2689 //--------add the entry-------
2690 if (++max >= InternalTableSize)
2693 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2698 public final void pushLineSeparator() throws InvalidInputException {
2699 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2700 final int INCREMENT = 250;
2701 if (this.checkNonExternalizedStringLiterals) {
2702 // reinitialize the current line for non externalize strings purpose
2705 //currentCharacter is at position currentPosition-1
2707 if (currentCharacter == '\r') {
2708 int separatorPos = currentPosition - 1;
2709 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2711 //System.out.println("CR-" + separatorPos);
2713 lineEnds[++linePtr] = separatorPos;
2714 } catch (IndexOutOfBoundsException e) {
2715 //linePtr value is correct
2716 int oldLength = lineEnds.length;
2717 int[] old = lineEnds;
2718 lineEnds = new int[oldLength + INCREMENT];
2719 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2720 lineEnds[linePtr] = separatorPos;
2722 // look-ahead for merged cr+lf
2724 if (source[currentPosition] == '\n') {
2725 //System.out.println("look-ahead LF-" + currentPosition);
2726 lineEnds[linePtr] = currentPosition;
2732 } catch (IndexOutOfBoundsException e) {
2737 if (currentCharacter == '\n') {
2738 //must merge eventual cr followed by lf
2739 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2740 //System.out.println("merge LF-" + (currentPosition - 1));
2741 lineEnds[linePtr] = currentPosition - 1;
2743 int separatorPos = currentPosition - 1;
2744 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2746 // System.out.println("LF-" + separatorPos);
2748 lineEnds[++linePtr] = separatorPos;
2749 } catch (IndexOutOfBoundsException e) {
2750 //linePtr value is correct
2751 int oldLength = lineEnds.length;
2752 int[] old = lineEnds;
2753 lineEnds = new int[oldLength + INCREMENT];
2754 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2755 lineEnds[linePtr] = separatorPos;
2763 public final void pushUnicodeLineSeparator() {
2764 // isUnicode means that the \r or \n has been read as a unicode character
2765 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2766 final int INCREMENT = 250;
2767 //currentCharacter is at position currentPosition-1
2768 if (this.checkNonExternalizedStringLiterals) {
2769 // reinitialize the current line for non externalize strings purpose
2773 if (currentCharacter == '\r') {
2774 int separatorPos = currentPosition - 6;
2775 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2777 //System.out.println("CR-" + separatorPos);
2779 lineEnds[++linePtr] = separatorPos;
2780 } catch (IndexOutOfBoundsException e) {
2781 //linePtr value is correct
2782 int oldLength = lineEnds.length;
2783 int[] old = lineEnds;
2784 lineEnds = new int[oldLength + INCREMENT];
2785 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2786 lineEnds[linePtr] = separatorPos;
2788 // look-ahead for merged cr+lf
2789 if (source[currentPosition] == '\n') {
2790 //System.out.println("look-ahead LF-" + currentPosition);
2791 lineEnds[linePtr] = currentPosition;
2799 if (currentCharacter == '\n') {
2800 //must merge eventual cr followed by lf
2801 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2802 //System.out.println("merge LF-" + (currentPosition - 1));
2803 lineEnds[linePtr] = currentPosition - 6;
2805 int separatorPos = currentPosition - 6;
2806 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2808 // System.out.println("LF-" + separatorPos);
2810 lineEnds[++linePtr] = separatorPos;
2811 } catch (IndexOutOfBoundsException e) {
2812 //linePtr value is correct
2813 int oldLength = lineEnds.length;
2814 int[] old = lineEnds;
2815 lineEnds = new int[oldLength + INCREMENT];
2816 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2817 lineEnds[linePtr] = separatorPos;
2825 public void recordComment(int token) {
2827 int stopPosition = this.currentPosition;
2829 case TokenNameCOMMENT_LINE:
2830 stopPosition = -this.lastCommentLinePosition;
2832 case TokenNameCOMMENT_BLOCK:
2833 stopPosition = -this.currentPosition;
2837 // a new comment is recorded
2838 int length = this.commentStops.length;
2839 if (++this.commentPtr >= length) {
2840 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2841 //grows the positions buffers too
2842 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2844 this.commentStops[this.commentPtr] = stopPosition;
2845 this.commentStarts[this.commentPtr] = this.startPosition;
2848 // public final void recordComment(boolean isJavadoc) {
2849 // // a new annotation comment is recorded
2851 // commentStops[++commentPtr] = isJavadoc
2852 // ? currentPosition
2853 // : -currentPosition;
2854 // } catch (IndexOutOfBoundsException e) {
2855 // int oldStackLength = commentStops.length;
2856 // int[] oldStack = commentStops;
2857 // commentStops = new int[oldStackLength + 30];
2858 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2859 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2860 // //grows the positions buffers too
2861 // int[] old = commentStarts;
2862 // commentStarts = new int[oldStackLength + 30];
2863 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2865 // //the buffer is of a correct size here
2866 // commentStarts[commentPtr] = startPosition;
2868 public void resetTo(int begin, int end) {
2869 //reset the scanner to a given position where it may rescan again
2871 initialPosition = startPosition = currentPosition = begin;
2872 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2873 commentPtr = -1; // reset comment stack
2876 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2877 // the string with "\\u" is a legal string of two chars \ and u
2878 //thus we use a direct access to the source (for regular cases).
2879 // if (unicodeAsBackSlash) {
2880 // // consume next character
2881 // unicodeAsBackSlash = false;
2882 // if (((currentCharacter = source[currentPosition++]) == '\\')
2883 // && (source[currentPosition] == 'u')) {
2884 // getNextUnicodeChar();
2886 // if (withoutUnicodePtr != 0) {
2887 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2891 currentCharacter = source[currentPosition++];
2892 switch (currentCharacter) {
2894 currentCharacter = '\'';
2897 currentCharacter = '\\';
2900 currentCharacter = '\\';
2905 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2906 // the string with "\\u" is a legal string of two chars \ and u
2907 //thus we use a direct access to the source (for regular cases).
2908 // if (unicodeAsBackSlash) {
2909 // // consume next character
2910 // unicodeAsBackSlash = false;
2911 // if (((currentCharacter = source[currentPosition++]) == '\\')
2912 // && (source[currentPosition] == 'u')) {
2913 // getNextUnicodeChar();
2915 // if (withoutUnicodePtr != 0) {
2916 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2920 currentCharacter = source[currentPosition++];
2921 switch (currentCharacter) {
2923 // currentCharacter = '\b';
2926 currentCharacter = '\t';
2929 currentCharacter = '\n';
2932 // currentCharacter = '\f';
2935 currentCharacter = '\r';
2938 currentCharacter = '\"';
2941 currentCharacter = '\'';
2944 currentCharacter = '\\';
2947 currentCharacter = '$';
2950 // -----------octal escape--------------
2952 // OctalDigit OctalDigit
2953 // ZeroToThree OctalDigit OctalDigit
2954 int number = Character.getNumericValue(currentCharacter);
2955 if (number >= 0 && number <= 7) {
2956 boolean zeroToThreeNot = number > 3;
2957 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2958 int digit = Character.getNumericValue(currentCharacter);
2959 if (digit >= 0 && digit <= 7) {
2960 number = (number * 8) + digit;
2961 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2962 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2963 // Digit --> ignore last character
2966 digit = Character.getNumericValue(currentCharacter);
2967 if (digit >= 0 && digit <= 7) {
2968 // has read \ZeroToThree OctalDigit OctalDigit
2969 number = (number * 8) + digit;
2970 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2971 // --> ignore last character
2975 } else { // has read \OctalDigit NonDigit--> ignore last
2979 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2983 } else { // has read \OctalDigit --> ignore last character
2987 throw new InvalidInputException(INVALID_ESCAPE);
2988 currentCharacter = (char) number;
2991 // throw new InvalidInputException(INVALID_ESCAPE);
2995 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2996 // return scanIdentifierOrKeyword( false );
2998 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3000 //first dispatch on the first char.
3001 //then the length. If there are several
3002 //keywors with the same length AND the same first char, then do another
3003 //disptach on the second char :-)...cool....but fast !
3004 useAssertAsAnIndentifier = false;
3005 while (getNextCharAsJavaIdentifierPart()) {
3009 // if (new String(getCurrentTokenSource()).equals("$this")) {
3010 // return TokenNamethis;
3012 return TokenNameVariable;
3017 // if (withoutUnicodePtr == 0)
3018 //quick test on length == 1 but not on length > 12 while most identifier
3019 //have a length which is <= 12...but there are lots of identifier with
3022 if ((length = currentPosition - startPosition) == 1)
3023 return TokenNameIdentifier;
3025 data = new char[length];
3026 index = startPosition;
3027 for (int i = 0; i < length; i++) {
3028 data[i] = Character.toLowerCase(source[index + i]);
3032 // if ((length = withoutUnicodePtr) == 1)
3033 // return TokenNameIdentifier;
3034 // // data = withoutUnicodeBuffer;
3035 // data = new char[withoutUnicodeBuffer.length];
3036 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3037 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3041 firstLetter = data[index];
3042 switch (firstLetter) {
3047 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3048 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3049 return TokenNameFILE;
3050 index = 0; //__LINE__
3051 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3052 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3053 return TokenNameLINE;
3057 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3058 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3059 return TokenNameCLASS_C;
3063 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3064 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3065 && (data[++index] == '_'))
3066 return TokenNameMETHOD_C;
3070 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3071 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3072 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3073 return TokenNameFUNC_C;
3076 return TokenNameIdentifier;
3078 // as and array abstract
3082 if ((data[++index] == 's')) {
3085 return TokenNameIdentifier;
3089 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3090 return TokenNameand;
3092 return TokenNameIdentifier;
3096 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3097 return TokenNamearray;
3099 return TokenNameIdentifier;
3101 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3102 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3103 return TokenNameabstract;
3105 return TokenNameIdentifier;
3107 return TokenNameIdentifier;
3113 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3114 return TokenNamebreak;
3116 return TokenNameIdentifier;
3118 return TokenNameIdentifier;
3121 //case catch class clone const continue
3124 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3125 return TokenNamecase;
3127 return TokenNameIdentifier;
3129 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3130 return TokenNamecatch;
3132 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3133 return TokenNameclass;
3135 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3136 return TokenNameclone;
3138 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3139 return TokenNameconst;
3141 return TokenNameIdentifier;
3143 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3144 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3145 return TokenNamecontinue;
3147 return TokenNameIdentifier;
3149 return TokenNameIdentifier;
3152 // declare default do die
3153 // TODO delete define ==> no keyword !
3156 if ((data[++index] == 'o'))
3159 return TokenNameIdentifier;
3161 // if ((data[++index] == 'e')
3162 // && (data[++index] == 'f')
3163 // && (data[++index] == 'i')
3164 // && (data[++index] == 'n')
3165 // && (data[++index] == 'e'))
3166 // return TokenNamedefine;
3168 // return TokenNameIdentifier;
3170 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3171 && (data[++index] == 'r') && (data[++index] == 'e'))
3172 return TokenNamedeclare;
3174 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3175 && (data[++index] == 'l') && (data[++index] == 't'))
3176 return TokenNamedefault;
3178 return TokenNameIdentifier;
3180 return TokenNameIdentifier;
3183 //echo else exit elseif extends eval
3186 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3187 return TokenNameecho;
3188 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3189 return TokenNameelse;
3190 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3191 return TokenNameexit;
3192 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3193 return TokenNameeval;
3195 return TokenNameIdentifier;
3198 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3199 return TokenNameendif;
3200 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3201 return TokenNameempty;
3203 return TokenNameIdentifier;
3206 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3207 && (data[++index] == 'r'))
3208 return TokenNameendfor;
3209 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3210 && (data[++index] == 'f'))
3211 return TokenNameelseif;
3213 return TokenNameIdentifier;
3215 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3216 && (data[++index] == 'd') && (data[++index] == 's'))
3217 return TokenNameextends;
3219 return TokenNameIdentifier;
3222 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3223 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3224 return TokenNameendwhile;
3226 return TokenNameIdentifier;
3229 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3230 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3231 return TokenNameendswitch;
3233 return TokenNameIdentifier;
3236 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3237 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3238 && (data[++index] == 'e'))
3239 return TokenNameenddeclare;
3241 if ((data[++index] == 'n') // endforeach
3242 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3243 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3244 return TokenNameendforeach;
3246 return TokenNameIdentifier;
3248 return TokenNameIdentifier;
3251 //for false final function
3254 if ((data[++index] == 'o') && (data[++index] == 'r'))
3255 return TokenNamefor;
3257 return TokenNameIdentifier;
3259 // if ((data[++index] == 'a') && (data[++index] == 'l')
3260 // && (data[++index] == 's') && (data[++index] == 'e'))
3261 // return TokenNamefalse;
3262 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3263 return TokenNamefinal;
3265 return TokenNameIdentifier;
3268 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3269 && (data[++index] == 'c') && (data[++index] == 'h'))
3270 return TokenNameforeach;
3272 return TokenNameIdentifier;
3275 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3276 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3277 return TokenNamefunction;
3279 return TokenNameIdentifier;
3281 return TokenNameIdentifier;
3286 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3287 && (data[++index] == 'l')) {
3288 return TokenNameglobal;
3291 return TokenNameIdentifier;
3293 //if int isset include include_once instanceof interface implements
3296 if (data[++index] == 'f')
3299 return TokenNameIdentifier;
3301 // if ((data[++index] == 'n') && (data[++index] == 't'))
3302 // return TokenNameint;
3304 // return TokenNameIdentifier;
3306 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3307 return TokenNameisset;
3309 return TokenNameIdentifier;
3311 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3312 && (data[++index] == 'd') && (data[++index] == 'e'))
3313 return TokenNameinclude;
3315 return TokenNameIdentifier;
3318 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3319 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3320 return TokenNameinterface;
3322 return TokenNameIdentifier;
3325 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3326 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3327 && (data[++index] == 'f'))
3328 return TokenNameinstanceof;
3329 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3330 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3331 && (data[++index] == 's'))
3332 return TokenNameimplements;
3334 return TokenNameIdentifier;
3336 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3337 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3338 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3339 return TokenNameinclude_once;
3341 return TokenNameIdentifier;
3343 return TokenNameIdentifier;
3348 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3349 return TokenNamelist;
3352 return TokenNameIdentifier;
3357 if ((data[++index] == 'e') && (data[++index] == 'w'))
3358 return TokenNamenew;
3360 return TokenNameIdentifier;
3362 // if ((data[++index] == 'u') && (data[++index] == 'l')
3363 // && (data[++index] == 'l'))
3364 // return TokenNamenull;
3366 // return TokenNameIdentifier;
3368 return TokenNameIdentifier;
3373 if (data[++index] == 'r') {
3377 // if (length == 12) {
3378 // if ((data[++index] == 'l')
3379 // && (data[++index] == 'd')
3380 // && (data[++index] == '_')
3381 // && (data[++index] == 'f')
3382 // && (data[++index] == 'u')
3383 // && (data[++index] == 'n')
3384 // && (data[++index] == 'c')
3385 // && (data[++index] == 't')
3386 // && (data[++index] == 'i')
3387 // && (data[++index] == 'o')
3388 // && (data[++index] == 'n')) {
3389 // return TokenNameold_function;
3392 return TokenNameIdentifier;
3394 // print public private protected
3397 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3398 return TokenNameprint;
3400 return TokenNameIdentifier;
3402 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3403 && (data[++index] == 'c')) {
3404 return TokenNamepublic;
3406 return TokenNameIdentifier;
3408 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3409 && (data[++index] == 't') && (data[++index] == 'e')) {
3410 return TokenNameprivate;
3412 return TokenNameIdentifier;
3414 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3415 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3416 return TokenNameprotected;
3418 return TokenNameIdentifier;
3420 return TokenNameIdentifier;
3422 //return require require_once
3424 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3425 && (data[++index] == 'n')) {
3426 return TokenNamereturn;
3428 } else if (length == 7) {
3429 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3430 && (data[++index] == 'r') && (data[++index] == 'e')) {
3431 return TokenNamerequire;
3433 } else if (length == 12) {
3434 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3435 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3436 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3437 return TokenNamerequire_once;
3440 return TokenNameIdentifier;
3445 if (data[++index] == 't')
3446 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3447 return TokenNamestatic;
3449 return TokenNameIdentifier;
3450 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3451 && (data[++index] == 'h'))
3452 return TokenNameswitch;
3454 return TokenNameIdentifier;
3456 return TokenNameIdentifier;
3462 if ((data[++index] == 'r') && (data[++index] == 'y'))
3463 return TokenNametry;
3465 return TokenNameIdentifier;
3467 // if ((data[++index] == 'r') && (data[++index] == 'u')
3468 // && (data[++index] == 'e'))
3469 // return TokenNametrue;
3471 // return TokenNameIdentifier;
3473 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3474 return TokenNamethrow;
3476 return TokenNameIdentifier;
3478 return TokenNameIdentifier;
3484 if ((data[++index] == 's') && (data[++index] == 'e'))
3485 return TokenNameuse;
3487 return TokenNameIdentifier;
3489 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3490 return TokenNameunset;
3492 return TokenNameIdentifier;
3494 return TokenNameIdentifier;
3500 if ((data[++index] == 'a') && (data[++index] == 'r'))
3501 return TokenNamevar;
3503 return TokenNameIdentifier;
3505 return TokenNameIdentifier;
3511 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3512 return TokenNamewhile;
3514 return TokenNameIdentifier;
3515 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3516 // (data[++index]=='e') && (data[++index]=='f')&&
3517 // (data[++index]=='p'))
3518 //return TokenNamewidefp ;
3520 //return TokenNameIdentifier;
3522 return TokenNameIdentifier;
3528 if ((data[++index] == 'o') && (data[++index] == 'r'))
3529 return TokenNamexor;
3531 return TokenNameIdentifier;
3533 return TokenNameIdentifier;
3536 return TokenNameIdentifier;
3540 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3541 //when entering this method the currentCharacter is the firt
3542 //digit of the number , i.e. it may be preceeded by a . when
3544 boolean floating = dotPrefix;
3545 if ((!dotPrefix) && (currentCharacter == '0')) {
3546 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3547 //force the first char of the hexa number do exist...
3548 // consume next character
3549 unicodeAsBackSlash = false;
3550 currentCharacter = source[currentPosition++];
3551 // if (((currentCharacter = source[currentPosition++]) == '\\')
3552 // && (source[currentPosition] == 'u')) {
3553 // getNextUnicodeChar();
3555 // if (withoutUnicodePtr != 0) {
3556 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3559 if (Character.digit(currentCharacter, 16) == -1)
3560 throw new InvalidInputException(INVALID_HEXA);
3562 while (getNextCharAsDigit(16)) {
3565 // if (getNextChar('l', 'L') >= 0)
3566 // return TokenNameLongLiteral;
3568 return TokenNameIntegerLiteral;
3570 //there is x or X in the number
3571 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3572 // 00078.0 is true !!!!! crazy language
3573 if (getNextCharAsDigit()) {
3574 //-------------potential octal-----------------
3575 while (getNextCharAsDigit()) {
3578 // if (getNextChar('l', 'L') >= 0) {
3579 // return TokenNameLongLiteral;
3582 // if (getNextChar('f', 'F') >= 0) {
3583 // return TokenNameFloatingPointLiteral;
3585 if (getNextChar('d', 'D') >= 0) {
3586 return TokenNameDoubleLiteral;
3587 } else { //make the distinction between octal and float ....
3588 if (getNextChar('.')) { //bingo ! ....
3589 while (getNextCharAsDigit()) {
3592 if (getNextChar('e', 'E') >= 0) {
3593 // consume next character
3594 unicodeAsBackSlash = false;
3595 currentCharacter = source[currentPosition++];
3596 // if (((currentCharacter = source[currentPosition++]) == '\\')
3597 // && (source[currentPosition] == 'u')) {
3598 // getNextUnicodeChar();
3600 // if (withoutUnicodePtr != 0) {
3601 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3604 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3605 // consume next character
3606 unicodeAsBackSlash = false;
3607 currentCharacter = source[currentPosition++];
3608 // if (((currentCharacter = source[currentPosition++]) == '\\')
3609 // && (source[currentPosition] == 'u')) {
3610 // getNextUnicodeChar();
3612 // if (withoutUnicodePtr != 0) {
3613 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3614 // currentCharacter;
3618 if (!Character.isDigit(currentCharacter))
3619 throw new InvalidInputException(INVALID_FLOAT);
3620 while (getNextCharAsDigit()) {
3624 // if (getNextChar('f', 'F') >= 0)
3625 // return TokenNameFloatingPointLiteral;
3626 getNextChar('d', 'D'); //jump over potential d or D
3627 return TokenNameDoubleLiteral;
3629 return TokenNameIntegerLiteral;
3636 while (getNextCharAsDigit()) {
3639 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3640 // return TokenNameLongLiteral;
3641 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3642 while (getNextCharAsDigit()) {
3647 //if floating is true both exponant and suffix may be optional
3648 if (getNextChar('e', 'E') >= 0) {
3650 // consume next character
3651 unicodeAsBackSlash = false;
3652 currentCharacter = source[currentPosition++];
3653 // if (((currentCharacter = source[currentPosition++]) == '\\')
3654 // && (source[currentPosition] == 'u')) {
3655 // getNextUnicodeChar();
3657 // if (withoutUnicodePtr != 0) {
3658 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3661 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3664 unicodeAsBackSlash = false;
3665 currentCharacter = source[currentPosition++];
3666 // if (((currentCharacter = source[currentPosition++]) == '\\')
3667 // && (source[currentPosition] == 'u')) {
3668 // getNextUnicodeChar();
3670 // if (withoutUnicodePtr != 0) {
3671 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3675 if (!Character.isDigit(currentCharacter))
3676 throw new InvalidInputException(INVALID_FLOAT);
3677 while (getNextCharAsDigit()) {
3681 if (getNextChar('d', 'D') >= 0)
3682 return TokenNameDoubleLiteral;
3683 // if (getNextChar('f', 'F') >= 0)
3684 // return TokenNameFloatingPointLiteral;
3685 //the long flag has been tested before
3686 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3690 * Search the line number corresponding to a specific position
3693 public final int getLineNumber(int position) {
3694 if (lineEnds == null)
3696 int length = linePtr + 1;
3699 int g = 0, d = length - 1;
3703 if (position < lineEnds[m]) {
3705 } else if (position > lineEnds[m]) {
3711 if (position < lineEnds[m]) {
3717 public void setPHPMode(boolean mode) {
3721 public final void setSource(char[] source) {
3722 setSource(null, source);
3725 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3726 //the source-buffer is set to sourceString
3727 this.compilationUnit = compilationUnit;
3728 if (source == null) {
3729 this.source = new char[0];
3731 this.source = source;
3734 initialPosition = currentPosition = 0;
3735 containsAssertKeyword = false;
3736 withoutUnicodeBuffer = new char[this.source.length];
3737 encapsedStringStack = new Stack();
3740 public String toString() {
3741 if (startPosition == source.length)
3742 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3743 if (currentPosition > source.length)
3744 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3745 char front[] = new char[startPosition];
3746 System.arraycopy(source, 0, front, 0, startPosition);
3747 int middleLength = (currentPosition - 1) - startPosition + 1;
3749 if (middleLength > -1) {
3750 middle = new char[middleLength];
3751 System.arraycopy(source, startPosition, middle, 0, middleLength);
3753 middle = new char[0];
3755 char end[] = new char[source.length - (currentPosition - 1)];
3756 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3757 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3758 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3762 public final String toStringAction(int act) {
3764 case TokenNameERROR:
3765 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3767 case TokenNameINLINE_HTML:
3768 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3769 case TokenNameIdentifier:
3770 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3771 case TokenNameVariable:
3772 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3773 case TokenNameabstract:
3774 return "abstract"; //$NON-NLS-1$
3776 return "AND"; //$NON-NLS-1$
3777 case TokenNamearray:
3778 return "array"; //$NON-NLS-1$
3780 return "as"; //$NON-NLS-1$
3781 case TokenNamebreak:
3782 return "break"; //$NON-NLS-1$
3784 return "case"; //$NON-NLS-1$
3785 case TokenNameclass:
3786 return "class"; //$NON-NLS-1$
3787 case TokenNamecatch:
3788 return "catch"; //$NON-NLS-1$
3789 case TokenNameclone:
3792 case TokenNameconst:
3795 case TokenNamecontinue:
3796 return "continue"; //$NON-NLS-1$
3797 case TokenNamedefault:
3798 return "default"; //$NON-NLS-1$
3799 // case TokenNamedefine :
3800 // return "define"; //$NON-NLS-1$
3802 return "do"; //$NON-NLS-1$
3804 return "echo"; //$NON-NLS-1$
3806 return "else"; //$NON-NLS-1$
3807 case TokenNameelseif:
3808 return "elseif"; //$NON-NLS-1$
3809 case TokenNameendfor:
3810 return "endfor"; //$NON-NLS-1$
3811 case TokenNameendforeach:
3812 return "endforeach"; //$NON-NLS-1$
3813 case TokenNameendif:
3814 return "endif"; //$NON-NLS-1$
3815 case TokenNameendswitch:
3816 return "endswitch"; //$NON-NLS-1$
3817 case TokenNameendwhile:
3818 return "endwhile"; //$NON-NLS-1$
3821 case TokenNameextends:
3822 return "extends"; //$NON-NLS-1$
3823 // case TokenNamefalse :
3824 // return "false"; //$NON-NLS-1$
3825 case TokenNamefinal:
3826 return "final"; //$NON-NLS-1$
3828 return "for"; //$NON-NLS-1$
3829 case TokenNameforeach:
3830 return "foreach"; //$NON-NLS-1$
3831 case TokenNamefunction:
3832 return "function"; //$NON-NLS-1$
3833 case TokenNameglobal:
3834 return "global"; //$NON-NLS-1$
3836 return "if"; //$NON-NLS-1$
3837 case TokenNameimplements:
3838 return "implements"; //$NON-NLS-1$
3839 case TokenNameinclude:
3840 return "include"; //$NON-NLS-1$
3841 case TokenNameinclude_once:
3842 return "include_once"; //$NON-NLS-1$
3843 case TokenNameinstanceof:
3844 return "instanceof"; //$NON-NLS-1$
3845 case TokenNameinterface:
3846 return "interface"; //$NON-NLS-1$
3847 case TokenNameisset:
3848 return "isset"; //$NON-NLS-1$
3850 return "list"; //$NON-NLS-1$
3852 return "new"; //$NON-NLS-1$
3853 // case TokenNamenull :
3854 // return "null"; //$NON-NLS-1$
3856 return "OR"; //$NON-NLS-1$
3857 case TokenNameprint:
3858 return "print"; //$NON-NLS-1$
3859 case TokenNameprivate:
3860 return "private"; //$NON-NLS-1$
3861 case TokenNameprotected:
3862 return "protected"; //$NON-NLS-1$
3863 case TokenNamepublic:
3864 return "public"; //$NON-NLS-1$
3865 case TokenNamerequire:
3866 return "require"; //$NON-NLS-1$
3867 case TokenNamerequire_once:
3868 return "require_once"; //$NON-NLS-1$
3869 case TokenNamereturn:
3870 return "return"; //$NON-NLS-1$
3871 case TokenNamestatic:
3872 return "static"; //$NON-NLS-1$
3873 case TokenNameswitch:
3874 return "switch"; //$NON-NLS-1$
3875 // case TokenNametrue :
3876 // return "true"; //$NON-NLS-1$
3877 case TokenNameunset:
3878 return "unset"; //$NON-NLS-1$
3880 return "var"; //$NON-NLS-1$
3881 case TokenNamewhile:
3882 return "while"; //$NON-NLS-1$
3884 return "XOR"; //$NON-NLS-1$
3885 // case TokenNamethis :
3886 // return "$this"; //$NON-NLS-1$
3887 case TokenNameIntegerLiteral:
3888 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3889 case TokenNameDoubleLiteral:
3890 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3891 case TokenNameStringDoubleQuote:
3892 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3893 case TokenNameStringSingleQuote:
3894 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3895 case TokenNameStringInterpolated:
3896 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3897 case TokenNameEncapsedString0:
3898 return "`"; //$NON-NLS-1$
3899 case TokenNameEncapsedString1:
3900 return "\'"; //$NON-NLS-1$
3901 case TokenNameEncapsedString2:
3902 return "\""; //$NON-NLS-1$
3903 case TokenNameSTRING:
3904 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3905 case TokenNameHEREDOC:
3906 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3907 case TokenNamePLUS_PLUS:
3908 return "++"; //$NON-NLS-1$
3909 case TokenNameMINUS_MINUS:
3910 return "--"; //$NON-NLS-1$
3911 case TokenNameEQUAL_EQUAL:
3912 return "=="; //$NON-NLS-1$
3913 case TokenNameEQUAL_EQUAL_EQUAL:
3914 return "==="; //$NON-NLS-1$
3915 case TokenNameEQUAL_GREATER:
3916 return "=>"; //$NON-NLS-1$
3917 case TokenNameLESS_EQUAL:
3918 return "<="; //$NON-NLS-1$
3919 case TokenNameGREATER_EQUAL:
3920 return ">="; //$NON-NLS-1$
3921 case TokenNameNOT_EQUAL:
3922 return "!="; //$NON-NLS-1$
3923 case TokenNameNOT_EQUAL_EQUAL:
3924 return "!=="; //$NON-NLS-1$
3925 case TokenNameLEFT_SHIFT:
3926 return "<<"; //$NON-NLS-1$
3927 case TokenNameRIGHT_SHIFT:
3928 return ">>"; //$NON-NLS-1$
3929 case TokenNamePLUS_EQUAL:
3930 return "+="; //$NON-NLS-1$
3931 case TokenNameMINUS_EQUAL:
3932 return "-="; //$NON-NLS-1$
3933 case TokenNameMULTIPLY_EQUAL:
3934 return "*="; //$NON-NLS-1$
3935 case TokenNameDIVIDE_EQUAL:
3936 return "/="; //$NON-NLS-1$
3937 case TokenNameAND_EQUAL:
3938 return "&="; //$NON-NLS-1$
3939 case TokenNameOR_EQUAL:
3940 return "|="; //$NON-NLS-1$
3941 case TokenNameXOR_EQUAL:
3942 return "^="; //$NON-NLS-1$
3943 case TokenNameREMAINDER_EQUAL:
3944 return "%="; //$NON-NLS-1$
3945 case TokenNameDOT_EQUAL:
3946 return ".="; //$NON-NLS-1$
3947 case TokenNameLEFT_SHIFT_EQUAL:
3948 return "<<="; //$NON-NLS-1$
3949 case TokenNameRIGHT_SHIFT_EQUAL:
3950 return ">>="; //$NON-NLS-1$
3951 case TokenNameOR_OR:
3952 return "||"; //$NON-NLS-1$
3953 case TokenNameAND_AND:
3954 return "&&"; //$NON-NLS-1$
3956 return "+"; //$NON-NLS-1$
3957 case TokenNameMINUS:
3958 return "-"; //$NON-NLS-1$
3959 case TokenNameMINUS_GREATER:
3962 return "!"; //$NON-NLS-1$
3963 case TokenNameREMAINDER:
3964 return "%"; //$NON-NLS-1$
3966 return "^"; //$NON-NLS-1$
3968 return "&"; //$NON-NLS-1$
3969 case TokenNameMULTIPLY:
3970 return "*"; //$NON-NLS-1$
3972 return "|"; //$NON-NLS-1$
3973 case TokenNameTWIDDLE:
3974 return "~"; //$NON-NLS-1$
3975 case TokenNameTWIDDLE_EQUAL:
3976 return "~="; //$NON-NLS-1$
3977 case TokenNameDIVIDE:
3978 return "/"; //$NON-NLS-1$
3979 case TokenNameGREATER:
3980 return ">"; //$NON-NLS-1$
3982 return "<"; //$NON-NLS-1$
3983 case TokenNameLPAREN:
3984 return "("; //$NON-NLS-1$
3985 case TokenNameRPAREN:
3986 return ")"; //$NON-NLS-1$
3987 case TokenNameLBRACE:
3988 return "{"; //$NON-NLS-1$
3989 case TokenNameRBRACE:
3990 return "}"; //$NON-NLS-1$
3991 case TokenNameLBRACKET:
3992 return "["; //$NON-NLS-1$
3993 case TokenNameRBRACKET:
3994 return "]"; //$NON-NLS-1$
3995 case TokenNameSEMICOLON:
3996 return ";"; //$NON-NLS-1$
3997 case TokenNameQUESTION:
3998 return "?"; //$NON-NLS-1$
3999 case TokenNameCOLON:
4000 return ":"; //$NON-NLS-1$
4001 case TokenNameCOMMA:
4002 return ","; //$NON-NLS-1$
4004 return "."; //$NON-NLS-1$
4005 case TokenNameEQUAL:
4006 return "="; //$NON-NLS-1$
4009 case TokenNameDOLLAR:
4011 case TokenNameDOLLAR_LBRACE:
4013 case TokenNameLBRACE_DOLLAR:
4016 return "EOF"; //$NON-NLS-1$
4017 case TokenNameWHITESPACE:
4018 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4019 case TokenNameCOMMENT_LINE:
4020 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4021 case TokenNameCOMMENT_BLOCK:
4022 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4023 case TokenNameCOMMENT_PHPDOC:
4024 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4025 // case TokenNameHTML :
4026 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4029 return "__FILE__"; //$NON-NLS-1$
4031 return "__LINE__"; //$NON-NLS-1$
4032 case TokenNameCLASS_C:
4033 return "__CLASS__"; //$NON-NLS-1$
4034 case TokenNameMETHOD_C:
4035 return "__METHOD__"; //$NON-NLS-1$
4036 case TokenNameFUNC_C:
4037 return "__FUNCTION__"; //$NON-NLS-1
4038 case TokenNameboolCAST:
4039 return "( bool )"; //$NON-NLS-1$
4040 case TokenNameintCAST:
4041 return "( int )"; //$NON-NLS-1$
4042 case TokenNamedoubleCAST:
4043 return "( double )"; //$NON-NLS-1$
4044 case TokenNameobjectCAST:
4045 return "( object )"; //$NON-NLS-1$
4046 case TokenNamestringCAST:
4047 return "( string )"; //$NON-NLS-1$
4049 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4057 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4058 this(tokenizeComments, tokenizeWhiteSpace, false);
4061 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4062 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4065 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4066 boolean assertMode) {
4067 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4070 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4071 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4072 this.eofPosition = Integer.MAX_VALUE;
4073 this.tokenizeComments = tokenizeComments;
4074 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4075 this.tokenizeStrings = tokenizeStrings;
4076 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4077 this.assertMode = assertMode;
4078 this.encapsedStringStack = null;
4079 this.taskTags = taskTags;
4080 this.taskPriorities = taskPriorities;
4083 private void checkNonExternalizeString() throws InvalidInputException {
4084 if (currentLine == null)
4086 parseTags(currentLine);
4089 private void parseTags(NLSLine line) throws InvalidInputException {
4090 String s = new String(getCurrentTokenSource());
4091 int pos = s.indexOf(TAG_PREFIX);
4092 int lineLength = line.size();
4094 int start = pos + TAG_PREFIX_LENGTH;
4095 int end = s.indexOf(TAG_POSTFIX, start);
4096 String index = s.substring(start, end);
4099 i = Integer.parseInt(index) - 1;
4100 // Tags are one based not zero based.
4101 } catch (NumberFormatException e) {
4102 i = -1; // we don't want to consider this as a valid NLS tag
4104 if (line.exists(i)) {
4107 pos = s.indexOf(TAG_PREFIX, start);
4109 this.nonNLSStrings = new StringLiteral[lineLength];
4110 int nonNLSCounter = 0;
4111 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4112 StringLiteral literal = (StringLiteral) iterator.next();
4113 if (literal != null) {
4114 this.nonNLSStrings[nonNLSCounter++] = literal;
4117 if (nonNLSCounter == 0) {
4118 this.nonNLSStrings = null;
4122 this.wasNonExternalizedStringLiteral = true;
4123 if (nonNLSCounter != lineLength) {
4124 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4129 public final void scanEscapeCharacter() throws InvalidInputException {
4130 // the string with "\\u" is a legal string of two chars \ and u
4131 //thus we use a direct access to the source (for regular cases).
4132 if (unicodeAsBackSlash) {
4133 // consume next character
4134 unicodeAsBackSlash = false;
4135 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4136 // (source[currentPosition] == 'u')) {
4137 // getNextUnicodeChar();
4139 if (withoutUnicodePtr != 0) {
4140 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4144 currentCharacter = source[currentPosition++];
4145 switch (currentCharacter) {
4147 currentCharacter = '\b';
4150 currentCharacter = '\t';
4153 currentCharacter = '\n';
4156 currentCharacter = '\f';
4159 currentCharacter = '\r';
4162 currentCharacter = '\"';
4165 currentCharacter = '\'';
4168 currentCharacter = '\\';
4171 // -----------octal escape--------------
4173 // OctalDigit OctalDigit
4174 // ZeroToThree OctalDigit OctalDigit
4175 int number = Character.getNumericValue(currentCharacter);
4176 if (number >= 0 && number <= 7) {
4177 boolean zeroToThreeNot = number > 3;
4178 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4179 int digit = Character.getNumericValue(currentCharacter);
4180 if (digit >= 0 && digit <= 7) {
4181 number = (number * 8) + digit;
4182 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4183 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4184 // Digit --> ignore last character
4187 digit = Character.getNumericValue(currentCharacter);
4188 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4189 // OctalDigit OctalDigit
4190 number = (number * 8) + digit;
4191 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4192 // --> ignore last character
4196 } else { // has read \OctalDigit NonDigit--> ignore last
4200 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4204 } else { // has read \OctalDigit --> ignore last character
4208 throw new InvalidInputException(INVALID_ESCAPE);
4209 currentCharacter = (char) number;
4211 throw new InvalidInputException(INVALID_ESCAPE);
4215 //chech presence of task: tags
4216 //TODO (frederic) see if we need to take unicode characters into account...
4217 public void checkTaskTag(int commentStart, int commentEnd) {
4218 char[] src = this.source;
4220 // only look for newer task: tags
4221 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4224 int foundTaskIndex = this.foundTaskCount;
4225 char previous = src[commentStart + 1]; // should be '*' or '/'
4226 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4228 char[] priority = null;
4229 // check for tag occurrence only if not ambiguous with javadoc tag
4230 if (previous != '@') {
4231 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4232 tag = this.taskTags[itag];
4233 int tagLength = tag.length;
4237 // ensure tag is not leaded with letter if tag starts with a letter
4238 if (Character.isJavaIdentifierStart(tag[0])) {
4239 if (Character.isJavaIdentifierPart(previous)) {
4244 for (int t = 0; t < tagLength; t++) {
4247 if (x >= this.eofPosition || x >= commentEnd)
4249 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4250 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4255 // ensure tag is not followed with letter if tag finishes with a letter
4256 if (i + tagLength < commentEnd && Character.isJavaIdentifierPart(src[i + tagLength - 1])) {
4257 if (Character.isJavaIdentifierPart(src[i + tagLength]))
4260 if (this.foundTaskTags == null) {
4261 this.foundTaskTags = new char[5][];
4262 this.foundTaskMessages = new char[5][];
4263 this.foundTaskPriorities = new char[5][];
4264 this.foundTaskPositions = new int[5][];
4265 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4266 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4267 this.foundTaskCount);
4268 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4269 this.foundTaskCount);
4270 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4271 this.foundTaskCount);
4272 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4273 this.foundTaskCount);
4276 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4278 this.foundTaskTags[this.foundTaskCount] = tag;
4279 this.foundTaskPriorities[this.foundTaskCount] = priority;
4280 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4281 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4282 this.foundTaskCount++;
4283 i += tagLength - 1; // will be incremented when looping
4289 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4290 // retrieve message start and end positions
4291 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4292 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4293 // at most beginning of next task
4294 if (max_value < msgStart) {
4295 max_value = msgStart; // would only occur if tag is before EOF.
4299 for (int j = msgStart; j < max_value; j++) {
4300 if ((c = src[j]) == '\n' || c == '\r') {
4306 for (int j = max_value; j > msgStart; j--) {
4307 if ((c = src[j]) == '*') {
4315 if (msgStart == end)
4318 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4320 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4322 // update the end position of the task
4323 this.foundTaskPositions[i][1] = end;
4324 // get the message source
4325 final int messageLength = end - msgStart + 1;
4326 char[] message = new char[messageLength];
4327 System.arraycopy(src, msgStart, message, 0, messageLength);
4328 this.foundTaskMessages[i] = message;
4332 // chech presence of task: tags
4333 // public void checkTaskTag(int commentStart, int commentEnd) {
4334 // // only look for newer task: tags
4335 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4338 // int foundTaskIndex = this.foundTaskCount;
4339 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4340 // char[] tag = null;
4341 // char[] priority = null;
4342 // // check for tag occurrence
4343 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4344 // tag = this.taskTags[itag];
4345 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4346 // int tagLength = tag.length;
4347 // for (int t = 0; t < tagLength; t++) {
4348 // if (this.source[i + t] != tag[t])
4349 // continue nextTag;
4351 // if (this.foundTaskTags == null) {
4352 // this.foundTaskTags = new char[5][];
4353 // this.foundTaskMessages = new char[5][];
4354 // this.foundTaskPriorities = new char[5][];
4355 // this.foundTaskPositions = new int[5][];
4356 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4357 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4358 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4359 // this.foundTaskCount);
4360 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4361 // this.foundTaskCount);
4362 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4363 // this.foundTaskCount);
4365 // this.foundTaskTags[this.foundTaskCount] = tag;
4366 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4367 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4368 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4369 // this.foundTaskCount++;
4370 // i += tagLength - 1; // will be incremented when looping
4373 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4374 // // retrieve message start and end positions
4375 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4376 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4377 // // at most beginning of next task
4378 // if (max_value < msgStart)
4379 // max_value = msgStart; // would only occur if tag is before EOF.
4382 // for (int j = msgStart; j < max_value; j++) {
4383 // if ((c = this.source[j]) == '\n' || c == '\r') {
4389 // for (int j = max_value; j > msgStart; j--) {
4390 // if ((c = this.source[j]) == '*') {
4398 // if (msgStart == end)
4399 // continue; // empty
4400 // // trim the message
4401 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4403 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4405 // // update the end position of the task
4406 // this.foundTaskPositions[i][1] = end;
4407 // // get the message source
4408 // final int messageLength = end - msgStart + 1;
4409 // char[] message = new char[messageLength];
4410 // System.arraycopy(source, msgStart, message, 0, messageLength);
4411 // this.foundTaskMessages[i] = message;