1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
19 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
21 public class Scanner implements IScanner, ITerminalSymbols {
23 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
24 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
25 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
28 private boolean assertMode;
30 public boolean useAssertAsAnIndentifier = false;
32 //flag indicating if processed source contains occurrences of keyword assert
33 public boolean containsAssertKeyword = false;
35 public boolean recordLineSeparator;
37 public boolean ignorePHPOneLiner = false;
39 public boolean phpMode = false;
41 public boolean phpExpressionTag = false;
43 // public Stack encapsedStringStack = null;
45 public char currentCharacter;
47 public int startPosition;
49 public int currentPosition;
51 public int initialPosition, eofPosition;
53 // after this position eof are generated instead of real token from the
55 public boolean tokenizeComments;
57 public boolean tokenizeWhiteSpace;
59 public boolean tokenizeStrings;
61 //source should be viewed as a window (aka a part)
62 //of a entire very large stream
66 public char[] withoutUnicodeBuffer;
68 public int withoutUnicodePtr;
70 //when == 0 ==> no unicode in the current token
71 public boolean unicodeAsBackSlash = false;
73 public boolean scanningFloatLiteral = false;
75 //support for /** comments
76 public int[] commentStops = new int[10];
78 public int[] commentStarts = new int[10];
80 public int commentPtr = -1; // no comment test with commentPtr value -1
82 protected int lastCommentLinePosition = -1;
84 //diet parsing support - jump over some method body when requested
85 public boolean diet = false;
87 //support for the poor-line-debuggers ....
88 //remember the position of the cr/lf
89 public int[] lineEnds = new int[250];
91 public int linePtr = -1;
93 public boolean wasAcr = false;
95 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
97 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
99 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
101 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
103 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
105 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
107 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
109 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
111 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
113 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
115 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
117 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
119 //----------------optimized identifier managment------------------
120 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
121 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
122 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
123 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
124 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
125 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
126 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
127 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
128 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
130 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
132 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
133 charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
134 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
135 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
136 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
137 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
138 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
139 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
141 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
143 static final int TableSize = 30, InternalTableSize = 6;
146 public static final int OptimizedLength = 6;
149 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
151 // support for detecting non-externalized string literals
152 int currentLineNr = -1;
154 int previousLineNr = -1;
156 NLSLine currentLine = null;
158 List lines = new ArrayList();
160 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
162 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
164 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
166 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
168 public StringLiteral[] nonNLSStrings = null;
170 public boolean checkNonExternalizedStringLiterals = true;
172 public boolean wasNonExternalizedStringLiteral = false;
174 for (int i = 0; i < 6; i++) {
175 for (int j = 0; j < TableSize; j++) {
176 for (int k = 0; k < InternalTableSize; k++) {
177 charArray_length[i][j][k] = initCharArray;
183 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
185 public static final int RoundBracket = 0;
187 public static final int SquareBracket = 1;
189 public static final int CurlyBracket = 2;
191 public static final int BracketKinds = 3;
194 public char[][] foundTaskTags = null;
196 public char[][] foundTaskMessages;
198 public char[][] foundTaskPriorities = null;
200 public int[][] foundTaskPositions;
202 public int foundTaskCount = 0;
204 public char[][] taskTags = null;
206 public char[][] taskPriorities = null;
208 public boolean isTaskCaseSensitive = true;
210 public static final boolean DEBUG = false;
212 public static final boolean TRACE = false;
214 public ICompilationUnit compilationUnit = null;
217 * Determines if the specified character is permissible as the first character in a PHP identifier or variable
219 * The '$' character for PHP variables is regarded as a correct first character !
222 public static boolean isPHPIdentOrVarStart(char ch) {
223 return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
227 * Determines if the specified character is permissible as the first character in a PHP identifier.
229 * The '$' character for PHP variables isn't regarded as the first character !
231 public static boolean isPHPIdentifierStart(char ch) {
232 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
236 * Determines if the specified character may be part of a PHP identifier as other than the first character
238 public static boolean isPHPIdentifierPart(char ch) {
239 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
242 public final boolean atEnd() {
243 // This code is not relevant if source is
244 // Only a part of the real stream input
245 return source.length == currentPosition;
248 public char[] getCurrentIdentifierSource() {
249 //return the token REAL source (aka unicodes are precomputed)
251 // if (withoutUnicodePtr != 0)
252 // //0 is used as a fast test flag so the real first char is in position 1
254 // withoutUnicodeBuffer,
256 // result = new char[withoutUnicodePtr],
258 // withoutUnicodePtr);
260 int length = currentPosition - startPosition;
261 switch (length) { // see OptimizedLength
263 return optimizedCurrentTokenSource1();
265 return optimizedCurrentTokenSource2();
267 return optimizedCurrentTokenSource3();
269 return optimizedCurrentTokenSource4();
271 return optimizedCurrentTokenSource5();
273 return optimizedCurrentTokenSource6();
276 System.arraycopy(source, startPosition, result = new char[length], 0, length);
281 public int getCurrentTokenEndPosition() {
282 return this.currentPosition - 1;
285 public final char[] getCurrentTokenSource() {
286 // Return the token REAL source (aka unicodes are precomputed)
288 // if (withoutUnicodePtr != 0)
289 // // 0 is used as a fast test flag so the real first char is in position 1
291 // withoutUnicodeBuffer,
293 // result = new char[withoutUnicodePtr],
295 // withoutUnicodePtr);
298 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
303 public final char[] getCurrentTokenSource(int startPos) {
304 // Return the token REAL source (aka unicodes are precomputed)
306 // if (withoutUnicodePtr != 0)
307 // // 0 is used as a fast test flag so the real first char is in position 1
309 // withoutUnicodeBuffer,
311 // result = new char[withoutUnicodePtr],
313 // withoutUnicodePtr);
316 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
321 public final char[] getCurrentTokenSourceString() {
322 //return the token REAL source (aka unicodes are precomputed).
323 //REMOVE the two " that are at the beginning and the end.
325 if (withoutUnicodePtr != 0)
326 //0 is used as a fast test flag so the real first char is in position 1
327 System.arraycopy(withoutUnicodeBuffer, 2,
328 //2 is 1 (real start) + 1 (to jump over the ")
329 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
332 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
337 public final char[] getRawTokenSourceEnd() {
338 int length = this.eofPosition - this.currentPosition - 1;
339 char[] sourceEnd = new char[length];
340 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
344 public int getCurrentTokenStartPosition() {
345 return this.startPosition;
348 public final char[] getCurrentStringLiteralSource() {
349 // Return the token REAL source (aka unicodes are precomputed)
350 if (startPosition + 1 >= currentPosition) {
355 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
360 public final char[] getCurrentStringLiteralSource(int startPos) {
361 // Return the token REAL source (aka unicodes are precomputed)
364 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
370 * Search the source position corresponding to the end of a given line number
372 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
374 * In case the given line number is inconsistent, answers -1.
376 public final int getLineEnd(int lineNumber) {
377 if (lineEnds == null)
379 if (lineNumber >= lineEnds.length)
383 if (lineNumber == lineEnds.length - 1)
385 return lineEnds[lineNumber - 1];
386 // next line start one character behind the lineEnd of the previous line
390 * Search the source position corresponding to the beginning of a given line number
392 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
394 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
396 * In case the given line number is inconsistent, answers -1.
398 public final int getLineStart(int lineNumber) {
399 if (lineEnds == null)
401 if (lineNumber >= lineEnds.length)
406 return initialPosition;
407 return lineEnds[lineNumber - 2] + 1;
408 // next line start one character behind the lineEnd of the previous line
411 public final boolean getNextChar(char testedChar) {
413 //handle the case of unicode.
414 //when a unicode appears then we must use a buffer that holds char
416 //At the end of this method currentCharacter holds the new visited char
417 //and currentPosition points right next after it
418 //Both previous lines are true if the currentCharacter is == to the
420 //On false, no side effect has occured.
421 //ALL getNextChar.... ARE OPTIMIZED COPIES
422 int temp = currentPosition;
424 currentCharacter = source[currentPosition++];
425 // if (((currentCharacter = source[currentPosition++]) == '\\')
426 // && (source[currentPosition] == 'u')) {
427 // //-------------unicode traitement ------------
428 // int c1, c2, c3, c4;
429 // int unicodeSize = 6;
430 // currentPosition++;
431 // while (source[currentPosition] == 'u') {
432 // currentPosition++;
436 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
438 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
440 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
442 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
444 // currentPosition = temp;
448 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
449 // if (currentCharacter != testedChar) {
450 // currentPosition = temp;
453 // unicodeAsBackSlash = currentCharacter == '\\';
455 // //need the unicode buffer
456 // if (withoutUnicodePtr == 0) {
457 // //buffer all the entries that have been left aside....
458 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
462 // withoutUnicodeBuffer,
464 // withoutUnicodePtr);
466 // //fill the buffer with the char
467 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
470 // } //-------------end unicode traitement--------------
472 if (currentCharacter != testedChar) {
473 currentPosition = temp;
476 unicodeAsBackSlash = false;
477 // if (withoutUnicodePtr != 0)
478 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
481 } catch (IndexOutOfBoundsException e) {
482 unicodeAsBackSlash = false;
483 currentPosition = temp;
488 public final int getNextChar(char testedChar1, char testedChar2) {
489 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
490 //test can be done with (x==0) for the first and (x>0) for the second
491 //handle the case of unicode.
492 //when a unicode appears then we must use a buffer that holds char
494 //At the end of this method currentCharacter holds the new visited char
495 //and currentPosition points right next after it
496 //Both previous lines are true if the currentCharacter is == to the
498 //On false, no side effect has occured.
499 //ALL getNextChar.... ARE OPTIMIZED COPIES
500 int temp = currentPosition;
503 currentCharacter = source[currentPosition++];
504 // if (((currentCharacter = source[currentPosition++]) == '\\')
505 // && (source[currentPosition] == 'u')) {
506 // //-------------unicode traitement ------------
507 // int c1, c2, c3, c4;
508 // int unicodeSize = 6;
509 // currentPosition++;
510 // while (source[currentPosition] == 'u') {
511 // currentPosition++;
515 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
517 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
519 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
521 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
523 // currentPosition = temp;
527 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
528 // if (currentCharacter == testedChar1)
530 // else if (currentCharacter == testedChar2)
533 // currentPosition = temp;
537 // //need the unicode buffer
538 // if (withoutUnicodePtr == 0) {
539 // //buffer all the entries that have been left aside....
540 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
544 // withoutUnicodeBuffer,
546 // withoutUnicodePtr);
548 // //fill the buffer with the char
549 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
551 // } //-------------end unicode traitement--------------
553 if (currentCharacter == testedChar1)
555 else if (currentCharacter == testedChar2)
558 currentPosition = temp;
561 // if (withoutUnicodePtr != 0)
562 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
565 } catch (IndexOutOfBoundsException e) {
566 currentPosition = temp;
571 public final boolean getNextCharAsDigit() {
573 //handle the case of unicode.
574 //when a unicode appears then we must use a buffer that holds char
576 //At the end of this method currentCharacter holds the new visited char
577 //and currentPosition points right next after it
578 //Both previous lines are true if the currentCharacter is a digit
579 //On false, no side effect has occured.
580 //ALL getNextChar.... ARE OPTIMIZED COPIES
581 int temp = currentPosition;
583 currentCharacter = source[currentPosition++];
584 // if (((currentCharacter = source[currentPosition++]) == '\\')
585 // && (source[currentPosition] == 'u')) {
586 // //-------------unicode traitement ------------
587 // int c1, c2, c3, c4;
588 // int unicodeSize = 6;
589 // currentPosition++;
590 // while (source[currentPosition] == 'u') {
591 // currentPosition++;
595 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
597 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
599 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
601 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
603 // currentPosition = temp;
607 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
608 // if (!Character.isDigit(currentCharacter)) {
609 // currentPosition = temp;
613 // //need the unicode buffer
614 // if (withoutUnicodePtr == 0) {
615 // //buffer all the entries that have been left aside....
616 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
620 // withoutUnicodeBuffer,
622 // withoutUnicodePtr);
624 // //fill the buffer with the char
625 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
627 // } //-------------end unicode traitement--------------
629 if (!Character.isDigit(currentCharacter)) {
630 currentPosition = temp;
633 // if (withoutUnicodePtr != 0)
634 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
637 } catch (IndexOutOfBoundsException e) {
638 currentPosition = temp;
643 public final boolean getNextCharAsDigit(int radix) {
645 //handle the case of unicode.
646 //when a unicode appears then we must use a buffer that holds char
648 //At the end of this method currentCharacter holds the new visited char
649 //and currentPosition points right next after it
650 //Both previous lines are true if the currentCharacter is a digit base on
652 //On false, no side effect has occured.
653 //ALL getNextChar.... ARE OPTIMIZED COPIES
654 int temp = currentPosition;
656 currentCharacter = source[currentPosition++];
657 // if (((currentCharacter = source[currentPosition++]) == '\\')
658 // && (source[currentPosition] == 'u')) {
659 // //-------------unicode traitement ------------
660 // int c1, c2, c3, c4;
661 // int unicodeSize = 6;
662 // currentPosition++;
663 // while (source[currentPosition] == 'u') {
664 // currentPosition++;
668 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
670 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
672 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
674 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
676 // currentPosition = temp;
680 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
681 // if (Character.digit(currentCharacter, radix) == -1) {
682 // currentPosition = temp;
686 // //need the unicode buffer
687 // if (withoutUnicodePtr == 0) {
688 // //buffer all the entries that have been left aside....
689 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
693 // withoutUnicodeBuffer,
695 // withoutUnicodePtr);
697 // //fill the buffer with the char
698 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
700 // } //-------------end unicode traitement--------------
702 if (Character.digit(currentCharacter, radix) == -1) {
703 currentPosition = temp;
706 // if (withoutUnicodePtr != 0)
707 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
710 } catch (IndexOutOfBoundsException e) {
711 currentPosition = temp;
716 public boolean getNextCharAsJavaIdentifierPart() {
718 //handle the case of unicode.
719 //when a unicode appears then we must use a buffer that holds char
721 //At the end of this method currentCharacter holds the new visited char
722 //and currentPosition points right next after it
723 //Both previous lines are true if the currentCharacter is a
724 // JavaIdentifierPart
725 //On false, no side effect has occured.
726 //ALL getNextChar.... ARE OPTIMIZED COPIES
727 int temp = currentPosition;
729 currentCharacter = source[currentPosition++];
730 // if (((currentCharacter = source[currentPosition++]) == '\\')
731 // && (source[currentPosition] == 'u')) {
732 // //-------------unicode traitement ------------
733 // int c1, c2, c3, c4;
734 // int unicodeSize = 6;
735 // currentPosition++;
736 // while (source[currentPosition] == 'u') {
737 // currentPosition++;
741 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
743 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
745 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
747 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
749 // currentPosition = temp;
753 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
754 // if (!isPHPIdentifierPart(currentCharacter)) {
755 // currentPosition = temp;
759 // //need the unicode buffer
760 // if (withoutUnicodePtr == 0) {
761 // //buffer all the entries that have been left aside....
762 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
766 // withoutUnicodeBuffer,
768 // withoutUnicodePtr);
770 // //fill the buffer with the char
771 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
773 // } //-------------end unicode traitement--------------
775 if (!isPHPIdentifierPart(currentCharacter)) {
776 currentPosition = temp;
779 // if (withoutUnicodePtr != 0)
780 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
783 } catch (IndexOutOfBoundsException e) {
784 currentPosition = temp;
789 public int getCastOrParen() {
790 int tempPosition = currentPosition;
791 char tempCharacter = currentCharacter;
792 int tempToken = TokenNameLPAREN;
793 boolean found = false;
794 StringBuffer buf = new StringBuffer();
797 currentCharacter = source[currentPosition++];
798 } while (currentCharacter == ' ' || currentCharacter == '\t');
799 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
800 buf.append(currentCharacter);
801 currentCharacter = source[currentPosition++];
803 if (buf.length() >= 3 && buf.length() <= 7) {
804 char[] data = buf.toString().toCharArray();
806 switch (data.length) {
809 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
811 tempToken = TokenNameintCAST;
816 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
818 tempToken = TokenNameboolCAST;
821 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
823 tempToken = TokenNamedoubleCAST;
829 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
830 && (data[++index] == 'y')) {
832 tempToken = TokenNamearrayCAST;
835 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
836 && (data[++index] == 't')) {
838 tempToken = TokenNameunsetCAST;
841 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
842 && (data[++index] == 't')) {
844 tempToken = TokenNamedoubleCAST;
850 // object string double
851 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
852 && (data[++index] == 'c') && (data[++index] == 't')) {
854 tempToken = TokenNameobjectCAST;
857 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
858 && (data[++index] == 'n') && (data[++index] == 'g')) {
860 tempToken = TokenNamestringCAST;
863 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
864 && (data[++index] == 'l') && (data[++index] == 'e')) {
866 tempToken = TokenNamedoubleCAST;
873 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
874 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
876 tempToken = TokenNameboolCAST;
879 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
880 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
882 tempToken = TokenNameintCAST;
888 while (currentCharacter == ' ' || currentCharacter == '\t') {
889 currentCharacter = source[currentPosition++];
891 if (currentCharacter == ')') {
896 } catch (IndexOutOfBoundsException e) {
898 currentCharacter = tempCharacter;
899 currentPosition = tempPosition;
900 return TokenNameLPAREN;
903 public void consumeStringInterpolated() throws InvalidInputException {
905 // consume next character
906 unicodeAsBackSlash = false;
907 currentCharacter = source[currentPosition++];
908 // if (((currentCharacter = source[currentPosition++]) == '\\')
909 // && (source[currentPosition] == 'u')) {
910 // getNextUnicodeChar();
912 // if (withoutUnicodePtr != 0) {
913 // withoutUnicodeBuffer[++withoutUnicodePtr] =
917 while (currentCharacter != '`') {
918 /** ** in PHP \r and \n are valid in string literals *** */
919 // if ((currentCharacter == '\n')
920 // || (currentCharacter == '\r')) {
921 // // relocate if finding another quote fairly close: thus unicode
922 // '/u000D' will be fully consumed
923 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
924 // if (currentPosition + lookAhead == source.length)
926 // if (source[currentPosition + lookAhead] == '\n')
928 // if (source[currentPosition + lookAhead] == '\"') {
929 // currentPosition += lookAhead + 1;
933 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
935 if (currentCharacter == '\\') {
936 int escapeSize = currentPosition;
937 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
938 //scanEscapeCharacter make a side effect on this value and we need
939 // the previous value few lines down this one
940 scanDoubleQuotedEscapeCharacter();
941 escapeSize = currentPosition - escapeSize;
942 if (withoutUnicodePtr == 0) {
943 //buffer all the entries that have been left aside....
944 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
945 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
946 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
947 } else { //overwrite the / in the buffer
948 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
949 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
950 // where only one is correct
954 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
955 if (recordLineSeparator) {
959 // consume next character
960 unicodeAsBackSlash = false;
961 currentCharacter = source[currentPosition++];
962 // if (((currentCharacter = source[currentPosition++]) == '\\')
963 // && (source[currentPosition] == 'u')) {
964 // getNextUnicodeChar();
966 if (withoutUnicodePtr != 0) {
967 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
971 } catch (IndexOutOfBoundsException e) {
972 // reset end position for error reporting
973 currentPosition -= 2;
974 throw new InvalidInputException(UNTERMINATED_STRING);
975 } catch (InvalidInputException e) {
976 if (e.getMessage().equals(INVALID_ESCAPE)) {
977 // relocate if finding another quote fairly close: thus unicode
978 // '/u000D' will be fully consumed
979 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
980 if (currentPosition + lookAhead == source.length)
982 if (source[currentPosition + lookAhead] == '\n')
984 if (source[currentPosition + lookAhead] == '`') {
985 currentPosition += lookAhead + 1;
992 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
993 // //$NON-NLS-?$ where ? is an
995 if (currentLine == null) {
996 currentLine = new NLSLine();
997 lines.add(currentLine);
999 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1003 public void consumeStringConstant() throws InvalidInputException {
1005 // consume next character
1006 unicodeAsBackSlash = false;
1007 currentCharacter = source[currentPosition++];
1008 // if (((currentCharacter = source[currentPosition++]) == '\\')
1009 // && (source[currentPosition] == 'u')) {
1010 // getNextUnicodeChar();
1012 // if (withoutUnicodePtr != 0) {
1013 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1014 // currentCharacter;
1017 while (currentCharacter != '\'') {
1018 /** ** in PHP \r and \n are valid in string literals *** */
1019 // if ((currentCharacter == '\n')
1020 // || (currentCharacter == '\r')) {
1021 // // relocate if finding another quote fairly close: thus unicode
1022 // '/u000D' will be fully consumed
1023 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1024 // if (currentPosition + lookAhead == source.length)
1026 // if (source[currentPosition + lookAhead] == '\n')
1028 // if (source[currentPosition + lookAhead] == '\"') {
1029 // currentPosition += lookAhead + 1;
1033 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1035 if (currentCharacter == '\\') {
1036 int escapeSize = currentPosition;
1037 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1038 //scanEscapeCharacter make a side effect on this value and we need
1039 // the previous value few lines down this one
1040 scanSingleQuotedEscapeCharacter();
1041 escapeSize = currentPosition - escapeSize;
1042 if (withoutUnicodePtr == 0) {
1043 //buffer all the entries that have been left aside....
1044 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1045 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1046 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1047 } else { //overwrite the / in the buffer
1048 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1049 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1050 // where only one is correct
1051 withoutUnicodePtr--;
1054 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1055 if (recordLineSeparator) {
1056 pushLineSeparator();
1059 // consume next character
1060 unicodeAsBackSlash = false;
1061 currentCharacter = source[currentPosition++];
1062 // if (((currentCharacter = source[currentPosition++]) == '\\')
1063 // && (source[currentPosition] == 'u')) {
1064 // getNextUnicodeChar();
1066 if (withoutUnicodePtr != 0) {
1067 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1071 } catch (IndexOutOfBoundsException e) {
1072 // reset end position for error reporting
1073 currentPosition -= 2;
1074 throw new InvalidInputException(UNTERMINATED_STRING);
1075 } catch (InvalidInputException e) {
1076 if (e.getMessage().equals(INVALID_ESCAPE)) {
1077 // relocate if finding another quote fairly close: thus unicode
1078 // '/u000D' will be fully consumed
1079 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1080 if (currentPosition + lookAhead == source.length)
1082 if (source[currentPosition + lookAhead] == '\n')
1084 if (source[currentPosition + lookAhead] == '\'') {
1085 currentPosition += lookAhead + 1;
1092 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1093 // //$NON-NLS-?$ where ? is an
1095 if (currentLine == null) {
1096 currentLine = new NLSLine();
1097 lines.add(currentLine);
1099 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1103 public void consumeStringLiteral() throws InvalidInputException {
1105 boolean openDollarBrace = false;
1106 // consume next character
1107 unicodeAsBackSlash = false;
1108 currentCharacter = source[currentPosition++];
1109 while (currentCharacter != '"' || openDollarBrace) {
1110 /** ** in PHP \r and \n are valid in string literals *** */
1111 if (currentCharacter == '\\') {
1112 int escapeSize = currentPosition;
1113 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1114 //scanEscapeCharacter make a side effect on this value and we need
1115 // the previous value few lines down this one
1116 scanDoubleQuotedEscapeCharacter();
1117 escapeSize = currentPosition - escapeSize;
1118 if (withoutUnicodePtr == 0) {
1119 //buffer all the entries that have been left aside....
1120 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1121 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1122 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1123 } else { //overwrite the / in the buffer
1124 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1125 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1126 // where only one is correct
1127 withoutUnicodePtr--;
1130 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1131 openDollarBrace = true;
1132 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1133 openDollarBrace = true;
1134 } else if (currentCharacter == '}') {
1135 openDollarBrace = false;
1136 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1137 if (recordLineSeparator) {
1138 pushLineSeparator();
1141 // consume next character
1142 unicodeAsBackSlash = false;
1143 currentCharacter = source[currentPosition++];
1144 if (withoutUnicodePtr != 0) {
1145 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1148 } catch (IndexOutOfBoundsException e) {
1149 // reset end position for error reporting
1150 currentPosition -= 2;
1151 throw new InvalidInputException(UNTERMINATED_STRING);
1152 } catch (InvalidInputException e) {
1153 if (e.getMessage().equals(INVALID_ESCAPE)) {
1154 // relocate if finding another quote fairly close: thus unicode
1155 // '/u000D' will be fully consumed
1156 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1157 if (currentPosition + lookAhead == source.length)
1159 if (source[currentPosition + lookAhead] == '\n')
1161 if (source[currentPosition + lookAhead] == '\"') {
1162 currentPosition += lookAhead + 1;
1169 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1170 // //$NON-NLS-?$ where ? is an
1172 if (currentLine == null) {
1173 currentLine = new NLSLine();
1174 lines.add(currentLine);
1176 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1180 public int getNextToken() throws InvalidInputException {
1181 phpExpressionTag = false;
1183 return getInlinedHTMLToken(currentPosition);
1186 this.wasAcr = false;
1188 jumpOverMethodBody();
1190 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1194 withoutUnicodePtr = 0;
1195 //start with a new token
1196 char encapsedChar = ' ';
1197 // if (!encapsedStringStack.isEmpty()) {
1198 // encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1200 // if (encapsedChar != '$' && encapsedChar != ' ') {
1201 // currentCharacter = source[currentPosition++];
1202 // if (currentCharacter == encapsedChar) {
1203 // switch (currentCharacter) {
1205 // return TokenNameEncapsedString0;
1207 // return TokenNameEncapsedString1;
1209 // return TokenNameEncapsedString2;
1212 // while (currentCharacter != encapsedChar) {
1213 // /** ** in PHP \r and \n are valid in string literals *** */
1214 // switch (currentCharacter) {
1216 // int escapeSize = currentPosition;
1217 // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1218 // //scanEscapeCharacter make a side effect on this value and
1219 // // we need the previous value few lines down this one
1220 // scanDoubleQuotedEscapeCharacter();
1221 // escapeSize = currentPosition - escapeSize;
1222 // if (withoutUnicodePtr == 0) {
1223 // //buffer all the entries that have been left aside....
1224 // withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1225 // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1226 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1227 // } else { //overwrite the / in the buffer
1228 // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1229 // if (backSlashAsUnicodeInString) { //there are TWO \ in
1230 // withoutUnicodePtr--;
1236 // if (recordLineSeparator) {
1237 // pushLineSeparator();
1241 // if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1242 // currentPosition--;
1243 // encapsedStringStack.push(new Character('$'));
1244 // return TokenNameSTRING;
1248 // if (source[currentPosition] == '$') { // CURLY_OPEN
1249 // currentPosition--;
1250 // encapsedStringStack.push(new Character('$'));
1251 // return TokenNameSTRING;
1254 // // consume next character
1255 // unicodeAsBackSlash = false;
1256 // currentCharacter = source[currentPosition++];
1257 // if (withoutUnicodePtr != 0) {
1258 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1262 // currentPosition--;
1263 // return TokenNameSTRING;
1265 // ---------Consume white space and handles startPosition---------
1266 int whiteStart = currentPosition;
1267 startPosition = currentPosition;
1268 currentCharacter = source[currentPosition++];
1269 // if (encapsedChar == '$') {
1270 // switch (currentCharacter) {
1272 // currentCharacter = source[currentPosition++];
1273 // return TokenNameSTRING;
1275 // if (encapsedChar == '$') {
1276 // if (getNextChar('$'))
1277 // return TokenNameLBRACE_DOLLAR;
1279 // return TokenNameLBRACE;
1281 // return TokenNameRBRACE;
1283 // return TokenNameLBRACKET;
1285 // return TokenNameRBRACKET;
1287 // if (tokenizeStrings) {
1288 // consumeStringConstant();
1289 // return TokenNameStringSingleQuote;
1291 // return TokenNameEncapsedString1;
1293 // return TokenNameEncapsedString2;
1295 // if (tokenizeStrings) {
1296 // consumeStringInterpolated();
1297 // return TokenNameStringInterpolated;
1299 // return TokenNameEncapsedString0;
1301 // if (getNextChar('>'))
1302 // return TokenNameMINUS_GREATER;
1303 // return TokenNameSTRING;
1305 // if (currentCharacter == '$') {
1306 // int oldPosition = currentPosition;
1308 // currentCharacter = source[currentPosition++];
1309 // if (currentCharacter == '{') {
1310 // return TokenNameDOLLAR_LBRACE;
1312 // if (isPHPIdentifierStart(currentCharacter)) {
1313 // return scanIdentifierOrKeyword(true);
1315 // currentPosition = oldPosition;
1316 // return TokenNameSTRING;
1318 // } catch (IndexOutOfBoundsException e) {
1319 // currentPosition = oldPosition;
1320 // return TokenNameSTRING;
1323 // if (isPHPIdentifierStart(currentCharacter))
1324 // return scanIdentifierOrKeyword(false);
1325 // if (Character.isDigit(currentCharacter))
1326 // return scanNumber(false);
1327 // return TokenNameERROR;
1330 // boolean isWhiteSpace;
1332 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1333 startPosition = currentPosition;
1334 currentCharacter = source[currentPosition++];
1335 // if (((currentCharacter = source[currentPosition++]) == '\\')
1336 // && (source[currentPosition] == 'u')) {
1337 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1339 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1340 checkNonExternalizeString();
1341 if (recordLineSeparator) {
1342 pushLineSeparator();
1347 // isWhiteSpace = (currentCharacter == ' ')
1348 // || Character.isWhitespace(currentCharacter);
1351 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1352 // reposition scanner in case we are interested by spaces as tokens
1354 startPosition = whiteStart;
1355 return TokenNameWHITESPACE;
1357 //little trick to get out in the middle of a source compuation
1358 if (currentPosition > eofPosition)
1359 return TokenNameEOF;
1360 // ---------Identify the next token-------------
1361 switch (currentCharacter) {
1363 return getCastOrParen();
1365 return TokenNameRPAREN;
1367 return TokenNameLBRACE;
1369 return TokenNameRBRACE;
1371 return TokenNameLBRACKET;
1373 return TokenNameRBRACKET;
1375 return TokenNameSEMICOLON;
1377 return TokenNameCOMMA;
1379 if (getNextChar('='))
1380 return TokenNameDOT_EQUAL;
1381 if (getNextCharAsDigit())
1382 return scanNumber(true);
1383 return TokenNameDOT;
1386 if ((test = getNextChar('+', '=')) == 0)
1387 return TokenNamePLUS_PLUS;
1389 return TokenNamePLUS_EQUAL;
1390 return TokenNamePLUS;
1394 if ((test = getNextChar('-', '=')) == 0)
1395 return TokenNameMINUS_MINUS;
1397 return TokenNameMINUS_EQUAL;
1398 if (getNextChar('>'))
1399 return TokenNameMINUS_GREATER;
1400 return TokenNameMINUS;
1403 if (getNextChar('='))
1404 return TokenNameTWIDDLE_EQUAL;
1405 return TokenNameTWIDDLE;
1407 if (getNextChar('=')) {
1408 if (getNextChar('=')) {
1409 return TokenNameNOT_EQUAL_EQUAL;
1411 return TokenNameNOT_EQUAL;
1413 return TokenNameNOT;
1415 if (getNextChar('='))
1416 return TokenNameMULTIPLY_EQUAL;
1417 return TokenNameMULTIPLY;
1419 if (getNextChar('='))
1420 return TokenNameREMAINDER_EQUAL;
1421 return TokenNameREMAINDER;
1423 int oldPosition = currentPosition;
1425 currentCharacter = source[currentPosition++];
1426 } catch (IndexOutOfBoundsException e) {
1427 currentPosition = oldPosition;
1428 return TokenNameLESS;
1430 switch (currentCharacter) {
1432 return TokenNameLESS_EQUAL;
1434 return TokenNameNOT_EQUAL;
1436 if (getNextChar('='))
1437 return TokenNameLEFT_SHIFT_EQUAL;
1438 if (getNextChar('<')) {
1439 currentCharacter = source[currentPosition++];
1440 while (Character.isWhitespace(currentCharacter)) {
1441 currentCharacter = source[currentPosition++];
1443 int heredocStart = currentPosition - 1;
1444 int heredocLength = 0;
1445 if (isPHPIdentifierStart(currentCharacter)) {
1446 currentCharacter = source[currentPosition++];
1448 return TokenNameERROR;
1450 while (isPHPIdentifierPart(currentCharacter)) {
1451 currentCharacter = source[currentPosition++];
1453 heredocLength = currentPosition - heredocStart - 1;
1454 // heredoc end-tag determination
1455 boolean endTag = true;
1458 ch = source[currentPosition++];
1459 if (ch == '\r' || ch == '\n') {
1460 if (recordLineSeparator) {
1461 pushLineSeparator();
1465 for (int i = 0; i < heredocLength; i++) {
1466 if (source[currentPosition + i] != source[heredocStart + i]) {
1472 currentPosition += heredocLength - 1;
1473 currentCharacter = source[currentPosition++];
1474 break; // do...while loop
1480 return TokenNameHEREDOC;
1482 return TokenNameLEFT_SHIFT;
1484 currentPosition = oldPosition;
1485 return TokenNameLESS;
1489 if ((test = getNextChar('=', '>')) == 0)
1490 return TokenNameGREATER_EQUAL;
1492 if ((test = getNextChar('=', '>')) == 0)
1493 return TokenNameRIGHT_SHIFT_EQUAL;
1494 return TokenNameRIGHT_SHIFT;
1496 return TokenNameGREATER;
1499 if (getNextChar('=')) {
1500 if (getNextChar('=')) {
1501 return TokenNameEQUAL_EQUAL_EQUAL;
1503 return TokenNameEQUAL_EQUAL;
1505 if (getNextChar('>'))
1506 return TokenNameEQUAL_GREATER;
1507 return TokenNameEQUAL;
1510 if ((test = getNextChar('&', '=')) == 0)
1511 return TokenNameAND_AND;
1513 return TokenNameAND_EQUAL;
1514 return TokenNameAND;
1518 if ((test = getNextChar('|', '=')) == 0)
1519 return TokenNameOR_OR;
1521 return TokenNameOR_EQUAL;
1525 if (getNextChar('='))
1526 return TokenNameXOR_EQUAL;
1527 return TokenNameXOR;
1529 if (getNextChar('>')) {
1531 if (currentPosition == source.length) {
1533 return TokenNameINLINE_HTML;
1535 return getInlinedHTMLToken(currentPosition - 2);
1537 return TokenNameQUESTION;
1539 if (getNextChar(':'))
1540 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1541 return TokenNameCOLON;
1545 consumeStringConstant();
1546 return TokenNameStringSingleQuote;
1548 // if (tokenizeStrings) {
1549 consumeStringLiteral();
1550 return TokenNameStringDoubleQuote;
1552 // return TokenNameEncapsedString2;
1554 // if (tokenizeStrings) {
1555 consumeStringInterpolated();
1556 return TokenNameStringInterpolated;
1558 // return TokenNameEncapsedString0;
1561 char startChar = currentCharacter;
1562 if (getNextChar('=') && startChar == '/') {
1563 return TokenNameDIVIDE_EQUAL;
1566 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1568 this.lastCommentLinePosition = this.currentPosition;
1569 int endPositionForLineComment = 0;
1570 try { //get the next char
1571 currentCharacter = source[currentPosition++];
1572 // if (((currentCharacter = source[currentPosition++])
1574 // && (source[currentPosition] == 'u')) {
1575 // //-------------unicode traitement ------------
1576 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1577 // currentPosition++;
1578 // while (source[currentPosition] == 'u') {
1579 // currentPosition++;
1582 // Character.getNumericValue(source[currentPosition++]))
1586 // Character.getNumericValue(source[currentPosition++]))
1590 // Character.getNumericValue(source[currentPosition++]))
1594 // Character.getNumericValue(source[currentPosition++]))
1598 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1600 // currentCharacter =
1601 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1604 //handle the \\u case manually into comment
1605 // if (currentCharacter == '\\') {
1606 // if (source[currentPosition] == '\\')
1607 // currentPosition++;
1608 // } //jump over the \\
1609 boolean isUnicode = false;
1610 while (currentCharacter != '\r' && currentCharacter != '\n') {
1611 this.lastCommentLinePosition = this.currentPosition;
1612 if (currentCharacter == '?') {
1613 if (getNextChar('>')) {
1614 // ?> breaks line comments
1615 startPosition = currentPosition - 2;
1617 return TokenNameINLINE_HTML;
1622 currentCharacter = source[currentPosition++];
1623 // if (((currentCharacter = source[currentPosition++])
1625 // && (source[currentPosition] == 'u')) {
1626 // isUnicode = true;
1627 // //-------------unicode traitement ------------
1628 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1629 // currentPosition++;
1630 // while (source[currentPosition] == 'u') {
1631 // currentPosition++;
1634 // Character.getNumericValue(source[currentPosition++]))
1638 // Character.getNumericValue(
1639 // source[currentPosition++]))
1643 // Character.getNumericValue(
1644 // source[currentPosition++]))
1648 // Character.getNumericValue(
1649 // source[currentPosition++]))
1653 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1655 // currentCharacter =
1656 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1659 //handle the \\u case manually into comment
1660 // if (currentCharacter == '\\') {
1661 // if (source[currentPosition] == '\\')
1662 // currentPosition++;
1663 // } //jump over the \\
1666 endPositionForLineComment = currentPosition - 6;
1668 endPositionForLineComment = currentPosition - 1;
1670 // recordComment(false);
1671 recordComment(TokenNameCOMMENT_LINE);
1672 if (this.taskTags != null)
1673 checkTaskTag(this.startPosition, this.currentPosition);
1674 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1675 checkNonExternalizeString();
1676 if (recordLineSeparator) {
1678 pushUnicodeLineSeparator();
1680 pushLineSeparator();
1686 if (tokenizeComments) {
1688 currentPosition = endPositionForLineComment;
1689 // reset one character behind
1691 return TokenNameCOMMENT_LINE;
1693 } catch (IndexOutOfBoundsException e) { //an eof will them
1695 if (tokenizeComments) {
1697 // reset one character behind
1698 return TokenNameCOMMENT_LINE;
1704 //traditional and annotation comment
1705 boolean isJavadoc = false, star = false;
1706 // consume next character
1707 unicodeAsBackSlash = false;
1708 currentCharacter = source[currentPosition++];
1709 // if (((currentCharacter = source[currentPosition++]) ==
1711 // && (source[currentPosition] == 'u')) {
1712 // getNextUnicodeChar();
1714 // if (withoutUnicodePtr != 0) {
1715 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1716 // currentCharacter;
1719 if (currentCharacter == '*') {
1723 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1724 checkNonExternalizeString();
1725 if (recordLineSeparator) {
1726 pushLineSeparator();
1731 try { //get the next char
1732 currentCharacter = source[currentPosition++];
1733 // if (((currentCharacter = source[currentPosition++])
1735 // && (source[currentPosition] == 'u')) {
1736 // //-------------unicode traitement ------------
1737 // getNextUnicodeChar();
1739 //handle the \\u case manually into comment
1740 // if (currentCharacter == '\\') {
1741 // if (source[currentPosition] == '\\')
1742 // currentPosition++;
1743 // //jump over the \\
1745 // empty comment is not a javadoc /**/
1746 if (currentCharacter == '/') {
1749 //loop until end of comment */
1750 while ((currentCharacter != '/') || (!star)) {
1751 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1752 checkNonExternalizeString();
1753 if (recordLineSeparator) {
1754 pushLineSeparator();
1759 star = currentCharacter == '*';
1761 currentCharacter = source[currentPosition++];
1762 // if (((currentCharacter = source[currentPosition++])
1764 // && (source[currentPosition] == 'u')) {
1765 // //-------------unicode traitement ------------
1766 // getNextUnicodeChar();
1768 //handle the \\u case manually into comment
1769 // if (currentCharacter == '\\') {
1770 // if (source[currentPosition] == '\\')
1771 // currentPosition++;
1772 // } //jump over the \\
1774 //recordComment(isJavadoc);
1776 recordComment(TokenNameCOMMENT_PHPDOC);
1778 recordComment(TokenNameCOMMENT_BLOCK);
1781 if (tokenizeComments) {
1783 return TokenNameCOMMENT_PHPDOC;
1784 return TokenNameCOMMENT_BLOCK;
1787 if (this.taskTags != null) {
1788 checkTaskTag(this.startPosition, this.currentPosition);
1790 } catch (IndexOutOfBoundsException e) {
1791 // reset end position for error reporting
1792 currentPosition -= 2;
1793 throw new InvalidInputException(UNTERMINATED_COMMENT);
1797 return TokenNameDIVIDE;
1801 return TokenNameEOF;
1802 //the atEnd may not be <currentPosition == source.length> if
1803 // source is only some part of a real (external) stream
1804 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1806 if (currentCharacter == '$') {
1807 int oldPosition = currentPosition;
1809 currentCharacter = source[currentPosition++];
1810 if (isPHPIdentifierStart(currentCharacter)) {
1811 return scanIdentifierOrKeyword(true);
1813 currentPosition = oldPosition;
1814 return TokenNameDOLLAR;
1816 } catch (IndexOutOfBoundsException e) {
1817 currentPosition = oldPosition;
1818 return TokenNameDOLLAR;
1821 if (isPHPIdentifierStart(currentCharacter))
1822 return scanIdentifierOrKeyword(false);
1823 if (Character.isDigit(currentCharacter))
1824 return scanNumber(false);
1825 return TokenNameERROR;
1828 } //-----------------end switch while try--------------------
1829 catch (IndexOutOfBoundsException e) {
1832 return TokenNameEOF;
1837 * @throws InvalidInputException
1839 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1840 if (currentPosition > source.length) {
1841 currentPosition = source.length;
1842 return TokenNameEOF;
1844 startPosition = start;
1847 currentCharacter = source[currentPosition++];
1848 if (currentCharacter == '<') {
1849 if (getNextChar('?')) {
1850 currentCharacter = source[currentPosition++];
1851 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1852 if (currentCharacter != '=') { // <?=
1855 phpExpressionTag = true;
1858 if (ignorePHPOneLiner) { // for CodeFormatter
1859 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1861 return TokenNameINLINE_HTML;
1865 return TokenNameINLINE_HTML;
1868 // boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1870 int test = getNextChar('H', 'h');
1872 test = getNextChar('P', 'p');
1875 if (ignorePHPOneLiner) {
1876 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1878 return TokenNameINLINE_HTML;
1882 return TokenNameINLINE_HTML;
1890 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1891 if (recordLineSeparator) {
1892 pushLineSeparator();
1897 } //-----------------while--------------------
1899 return TokenNameINLINE_HTML;
1900 } //-----------------try--------------------
1901 catch (IndexOutOfBoundsException e) {
1902 startPosition = start;
1906 return TokenNameINLINE_HTML;
1912 private int lookAheadLinePHPTag() {
1913 // check if the PHP is only in this line (for CodeFormatter)
1914 int currentPositionInLine = currentPosition;
1915 char previousCharInLine = ' ';
1916 char currentCharInLine = ' ';
1917 boolean singleQuotedStringActive = false;
1918 boolean doubleQuotedStringActive = false;
1921 // look ahead in this line
1923 previousCharInLine = currentCharInLine;
1924 currentCharInLine = source[currentPositionInLine++];
1925 switch (currentCharInLine) {
1927 if (previousCharInLine == '?') {
1928 // update the scanner's current Position in the source
1929 currentPosition = currentPositionInLine;
1930 // use as "dummy" token
1931 return TokenNameEOF;
1935 if (doubleQuotedStringActive) {
1936 // ignore escaped characters in double quoted strings
1937 previousCharInLine = currentCharInLine;
1938 currentCharInLine = source[currentPositionInLine++];
1941 if (doubleQuotedStringActive) {
1942 doubleQuotedStringActive = false;
1944 if (!singleQuotedStringActive) {
1945 doubleQuotedStringActive = true;
1950 if (singleQuotedStringActive) {
1951 if (previousCharInLine != '\\') {
1952 singleQuotedStringActive = false;
1955 if (!doubleQuotedStringActive) {
1956 singleQuotedStringActive = true;
1962 return TokenNameINLINE_HTML;
1964 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1966 return TokenNameINLINE_HTML;
1970 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1972 return TokenNameINLINE_HTML;
1976 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1978 return TokenNameINLINE_HTML;
1983 } catch (IndexOutOfBoundsException e) {
1985 currentPosition = currentPositionInLine;
1986 return TokenNameINLINE_HTML;
1990 // public final void getNextUnicodeChar()
1991 // throws IndexOutOfBoundsException, InvalidInputException {
1993 // //handle the case of unicode.
1994 // //when a unicode appears then we must use a buffer that holds char
1996 // //At the end of this method currentCharacter holds the new visited char
1997 // //and currentPosition points right next after it
1999 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2001 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2002 // currentPosition++;
2003 // while (source[currentPosition] == 'u') {
2004 // currentPosition++;
2008 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2010 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2012 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2014 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2016 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2018 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2019 // //need the unicode buffer
2020 // if (withoutUnicodePtr == 0) {
2021 // //buffer all the entries that have been left aside....
2022 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2023 // System.arraycopy(
2026 // withoutUnicodeBuffer,
2028 // withoutUnicodePtr);
2030 // //fill the buffer with the char
2031 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2033 // unicodeAsBackSlash = currentCharacter == '\\';
2036 * Tokenize a method body, assuming that curly brackets are properly balanced.
2038 public final void jumpOverMethodBody() {
2039 this.wasAcr = false;
2042 while (true) { //loop for jumping over comments
2043 // ---------Consume white space and handles startPosition---------
2044 boolean isWhiteSpace;
2046 startPosition = currentPosition;
2047 currentCharacter = source[currentPosition++];
2048 // if (((currentCharacter = source[currentPosition++]) == '\\')
2049 // && (source[currentPosition] == 'u')) {
2050 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2052 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2053 pushLineSeparator();
2054 isWhiteSpace = Character.isWhitespace(currentCharacter);
2056 } while (isWhiteSpace);
2057 // -------consume token until } is found---------
2058 switch (currentCharacter) {
2069 test = getNextChar('\\');
2072 scanDoubleQuotedEscapeCharacter();
2073 } catch (InvalidInputException ex) {
2077 // try { // consume next character
2078 unicodeAsBackSlash = false;
2079 currentCharacter = source[currentPosition++];
2080 // if (((currentCharacter = source[currentPosition++]) == '\\')
2081 // && (source[currentPosition] == 'u')) {
2082 // getNextUnicodeChar();
2084 if (withoutUnicodePtr != 0) {
2085 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2088 // } catch (InvalidInputException ex) {
2096 // try { // consume next character
2097 unicodeAsBackSlash = false;
2098 currentCharacter = source[currentPosition++];
2099 // if (((currentCharacter = source[currentPosition++]) == '\\')
2100 // && (source[currentPosition] == 'u')) {
2101 // getNextUnicodeChar();
2103 if (withoutUnicodePtr != 0) {
2104 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2107 // } catch (InvalidInputException ex) {
2109 while (currentCharacter != '"') {
2110 if (currentCharacter == '\r') {
2111 if (source[currentPosition] == '\n')
2114 // the string cannot go further that the line
2116 if (currentCharacter == '\n') {
2118 // the string cannot go further that the line
2120 if (currentCharacter == '\\') {
2122 scanDoubleQuotedEscapeCharacter();
2123 } catch (InvalidInputException ex) {
2127 // try { // consume next character
2128 unicodeAsBackSlash = false;
2129 currentCharacter = source[currentPosition++];
2130 // if (((currentCharacter = source[currentPosition++]) == '\\')
2131 // && (source[currentPosition] == 'u')) {
2132 // getNextUnicodeChar();
2134 if (withoutUnicodePtr != 0) {
2135 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2138 // } catch (InvalidInputException ex) {
2141 } catch (IndexOutOfBoundsException e) {
2147 if ((test = getNextChar('/', '*')) == 0) {
2151 currentCharacter = source[currentPosition++];
2152 // if (((currentCharacter = source[currentPosition++]) ==
2154 // && (source[currentPosition] == 'u')) {
2155 // //-------------unicode traitement ------------
2156 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2157 // currentPosition++;
2158 // while (source[currentPosition] == 'u') {
2159 // currentPosition++;
2162 // Character.getNumericValue(source[currentPosition++]))
2166 // Character.getNumericValue(source[currentPosition++]))
2170 // Character.getNumericValue(source[currentPosition++]))
2174 // Character.getNumericValue(source[currentPosition++]))
2177 // //error don't care of the value
2178 // currentCharacter = 'A';
2179 // } //something different from \n and \r
2181 // currentCharacter =
2182 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2185 while (currentCharacter != '\r' && currentCharacter != '\n') {
2187 currentCharacter = source[currentPosition++];
2188 // if (((currentCharacter = source[currentPosition++])
2190 // && (source[currentPosition] == 'u')) {
2191 // //-------------unicode traitement ------------
2192 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2193 // currentPosition++;
2194 // while (source[currentPosition] == 'u') {
2195 // currentPosition++;
2198 // Character.getNumericValue(source[currentPosition++]))
2202 // Character.getNumericValue(source[currentPosition++]))
2206 // Character.getNumericValue(source[currentPosition++]))
2210 // Character.getNumericValue(source[currentPosition++]))
2213 // //error don't care of the value
2214 // currentCharacter = 'A';
2215 // } //something different from \n and \r
2217 // currentCharacter =
2218 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2222 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2223 pushLineSeparator();
2224 } catch (IndexOutOfBoundsException e) {
2225 } //an eof will them be generated
2229 //traditional and annotation comment
2230 boolean star = false;
2231 // try { // consume next character
2232 unicodeAsBackSlash = false;
2233 currentCharacter = source[currentPosition++];
2234 // if (((currentCharacter = source[currentPosition++]) == '\\')
2235 // && (source[currentPosition] == 'u')) {
2236 // getNextUnicodeChar();
2238 if (withoutUnicodePtr != 0) {
2239 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2242 // } catch (InvalidInputException ex) {
2244 if (currentCharacter == '*') {
2247 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2248 pushLineSeparator();
2249 try { //get the next char
2250 currentCharacter = source[currentPosition++];
2251 // if (((currentCharacter = source[currentPosition++]) ==
2253 // && (source[currentPosition] == 'u')) {
2254 // //-------------unicode traitement ------------
2255 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2256 // currentPosition++;
2257 // while (source[currentPosition] == 'u') {
2258 // currentPosition++;
2261 // Character.getNumericValue(source[currentPosition++]))
2265 // Character.getNumericValue(source[currentPosition++]))
2269 // Character.getNumericValue(source[currentPosition++]))
2273 // Character.getNumericValue(source[currentPosition++]))
2276 // //error don't care of the value
2277 // currentCharacter = 'A';
2278 // } //something different from * and /
2280 // currentCharacter =
2281 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2284 //loop until end of comment */
2285 while ((currentCharacter != '/') || (!star)) {
2286 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2287 pushLineSeparator();
2288 star = currentCharacter == '*';
2290 currentCharacter = source[currentPosition++];
2291 // if (((currentCharacter = source[currentPosition++])
2293 // && (source[currentPosition] == 'u')) {
2294 // //-------------unicode traitement ------------
2295 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2296 // currentPosition++;
2297 // while (source[currentPosition] == 'u') {
2298 // currentPosition++;
2301 // Character.getNumericValue(source[currentPosition++]))
2305 // Character.getNumericValue(source[currentPosition++]))
2309 // Character.getNumericValue(source[currentPosition++]))
2313 // Character.getNumericValue(source[currentPosition++]))
2316 // //error don't care of the value
2317 // currentCharacter = 'A';
2318 // } //something different from * and /
2320 // currentCharacter =
2321 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2325 } catch (IndexOutOfBoundsException e) {
2333 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2335 scanIdentifierOrKeyword((currentCharacter == '$'));
2336 } catch (InvalidInputException ex) {
2341 if (Character.isDigit(currentCharacter)) {
2344 } catch (InvalidInputException ex) {
2351 //-----------------end switch while try--------------------
2352 } catch (IndexOutOfBoundsException e) {
2353 } catch (InvalidInputException e) {
2358 // public final boolean jumpOverUnicodeWhiteSpace()
2359 // throws InvalidInputException {
2361 // //handle the case of unicode. Jump over the next whiteSpace
2362 // //making startPosition pointing on the next available char
2363 // //On false, the currentCharacter is filled up with a potential
2367 // this.wasAcr = false;
2368 // int c1, c2, c3, c4;
2369 // int unicodeSize = 6;
2370 // currentPosition++;
2371 // while (source[currentPosition] == 'u') {
2372 // currentPosition++;
2376 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2378 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2380 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2382 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2384 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2387 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2388 // if (recordLineSeparator
2389 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2390 // pushLineSeparator();
2391 // if (Character.isWhitespace(currentCharacter))
2394 // //buffer the new char which is not a white space
2395 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2396 // //withoutUnicodePtr == 1 is true here
2398 // } catch (IndexOutOfBoundsException e) {
2399 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2402 public final int[] getLineEnds() {
2403 //return a bounded copy of this.lineEnds
2405 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2409 public char[] getSource() {
2413 public static boolean isIdentifierOrKeyword(int token) {
2414 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2417 final char[] optimizedCurrentTokenSource1() {
2418 //return always the same char[] build only once
2419 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2420 char charOne = source[startPosition];
2475 return new char[] { charOne };
2479 final char[] optimizedCurrentTokenSource2() {
2481 c0 = source[startPosition];
2482 c1 = source[startPosition + 1];
2484 //return always the same char[] build only once
2485 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2488 return charArray_va;
2490 return charArray_vb;
2492 return charArray_vc;
2494 return charArray_vd;
2496 return charArray_ve;
2498 return charArray_vf;
2500 return charArray_vg;
2502 return charArray_vh;
2504 return charArray_vi;
2506 return charArray_vj;
2508 return charArray_vk;
2510 return charArray_vl;
2512 return charArray_vm;
2514 return charArray_vn;
2516 return charArray_vo;
2518 return charArray_vp;
2520 return charArray_vq;
2522 return charArray_vr;
2524 return charArray_vs;
2526 return charArray_vt;
2528 return charArray_vu;
2530 return charArray_vv;
2532 return charArray_vw;
2534 return charArray_vx;
2536 return charArray_vy;
2538 return charArray_vz;
2541 //try to return the same char[] build only once
2542 int hash = ((c0 << 6) + c1) % TableSize;
2543 char[][] table = charArray_length[0][hash];
2545 while (++i < InternalTableSize) {
2546 char[] charArray = table[i];
2547 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2550 //---------other side---------
2552 int max = newEntry2;
2553 while (++i <= max) {
2554 char[] charArray = table[i];
2555 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2558 //--------add the entry-------
2559 if (++max >= InternalTableSize)
2562 table[max] = (r = new char[] { c0, c1 });
2567 final char[] optimizedCurrentTokenSource3() {
2568 //try to return the same char[] build only once
2570 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2572 char[][] table = charArray_length[1][hash];
2574 while (++i < InternalTableSize) {
2575 char[] charArray = table[i];
2576 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2579 //---------other side---------
2581 int max = newEntry3;
2582 while (++i <= max) {
2583 char[] charArray = table[i];
2584 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2587 //--------add the entry-------
2588 if (++max >= InternalTableSize)
2591 table[max] = (r = new char[] { c0, c1, c2 });
2596 final char[] optimizedCurrentTokenSource4() {
2597 //try to return the same char[] build only once
2598 char c0, c1, c2, c3;
2599 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2600 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2602 char[][] table = charArray_length[2][(int) hash];
2604 while (++i < InternalTableSize) {
2605 char[] charArray = table[i];
2606 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2609 //---------other side---------
2611 int max = newEntry4;
2612 while (++i <= max) {
2613 char[] charArray = table[i];
2614 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2617 //--------add the entry-------
2618 if (++max >= InternalTableSize)
2621 table[max] = (r = new char[] { c0, c1, c2, c3 });
2626 final char[] optimizedCurrentTokenSource5() {
2627 //try to return the same char[] build only once
2628 char c0, c1, c2, c3, c4;
2629 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2630 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2632 char[][] table = charArray_length[3][(int) hash];
2634 while (++i < InternalTableSize) {
2635 char[] charArray = table[i];
2636 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2639 //---------other side---------
2641 int max = newEntry5;
2642 while (++i <= max) {
2643 char[] charArray = table[i];
2644 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2647 //--------add the entry-------
2648 if (++max >= InternalTableSize)
2651 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2656 final char[] optimizedCurrentTokenSource6() {
2657 //try to return the same char[] build only once
2658 char c0, c1, c2, c3, c4, c5;
2659 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2660 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2661 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2663 char[][] table = charArray_length[4][(int) hash];
2665 while (++i < InternalTableSize) {
2666 char[] charArray = table[i];
2667 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2668 && (c5 == charArray[5]))
2671 //---------other side---------
2673 int max = newEntry6;
2674 while (++i <= max) {
2675 char[] charArray = table[i];
2676 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2677 && (c5 == charArray[5]))
2680 //--------add the entry-------
2681 if (++max >= InternalTableSize)
2684 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2689 public final void pushLineSeparator() throws InvalidInputException {
2690 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2691 final int INCREMENT = 250;
2692 if (this.checkNonExternalizedStringLiterals) {
2693 // reinitialize the current line for non externalize strings purpose
2696 //currentCharacter is at position currentPosition-1
2698 if (currentCharacter == '\r') {
2699 int separatorPos = currentPosition - 1;
2700 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2702 //System.out.println("CR-" + separatorPos);
2704 lineEnds[++linePtr] = separatorPos;
2705 } catch (IndexOutOfBoundsException e) {
2706 //linePtr value is correct
2707 int oldLength = lineEnds.length;
2708 int[] old = lineEnds;
2709 lineEnds = new int[oldLength + INCREMENT];
2710 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2711 lineEnds[linePtr] = separatorPos;
2713 // look-ahead for merged cr+lf
2715 if (source[currentPosition] == '\n') {
2716 //System.out.println("look-ahead LF-" + currentPosition);
2717 lineEnds[linePtr] = currentPosition;
2723 } catch (IndexOutOfBoundsException e) {
2728 if (currentCharacter == '\n') {
2729 //must merge eventual cr followed by lf
2730 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2731 //System.out.println("merge LF-" + (currentPosition - 1));
2732 lineEnds[linePtr] = currentPosition - 1;
2734 int separatorPos = currentPosition - 1;
2735 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2737 // System.out.println("LF-" + separatorPos);
2739 lineEnds[++linePtr] = separatorPos;
2740 } catch (IndexOutOfBoundsException e) {
2741 //linePtr value is correct
2742 int oldLength = lineEnds.length;
2743 int[] old = lineEnds;
2744 lineEnds = new int[oldLength + INCREMENT];
2745 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2746 lineEnds[linePtr] = separatorPos;
2754 public final void pushUnicodeLineSeparator() {
2755 // isUnicode means that the \r or \n has been read as a unicode character
2756 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2757 final int INCREMENT = 250;
2758 //currentCharacter is at position currentPosition-1
2759 if (this.checkNonExternalizedStringLiterals) {
2760 // reinitialize the current line for non externalize strings purpose
2764 if (currentCharacter == '\r') {
2765 int separatorPos = currentPosition - 6;
2766 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2768 //System.out.println("CR-" + separatorPos);
2770 lineEnds[++linePtr] = separatorPos;
2771 } catch (IndexOutOfBoundsException e) {
2772 //linePtr value is correct
2773 int oldLength = lineEnds.length;
2774 int[] old = lineEnds;
2775 lineEnds = new int[oldLength + INCREMENT];
2776 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2777 lineEnds[linePtr] = separatorPos;
2779 // look-ahead for merged cr+lf
2780 if (source[currentPosition] == '\n') {
2781 //System.out.println("look-ahead LF-" + currentPosition);
2782 lineEnds[linePtr] = currentPosition;
2790 if (currentCharacter == '\n') {
2791 //must merge eventual cr followed by lf
2792 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2793 //System.out.println("merge LF-" + (currentPosition - 1));
2794 lineEnds[linePtr] = currentPosition - 6;
2796 int separatorPos = currentPosition - 6;
2797 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2799 // System.out.println("LF-" + separatorPos);
2801 lineEnds[++linePtr] = separatorPos;
2802 } catch (IndexOutOfBoundsException e) {
2803 //linePtr value is correct
2804 int oldLength = lineEnds.length;
2805 int[] old = lineEnds;
2806 lineEnds = new int[oldLength + INCREMENT];
2807 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2808 lineEnds[linePtr] = separatorPos;
2816 public void recordComment(int token) {
2818 int stopPosition = this.currentPosition;
2820 case TokenNameCOMMENT_LINE:
2821 stopPosition = -this.lastCommentLinePosition;
2823 case TokenNameCOMMENT_BLOCK:
2824 stopPosition = -this.currentPosition;
2828 // a new comment is recorded
2829 int length = this.commentStops.length;
2830 if (++this.commentPtr >= length) {
2831 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2832 //grows the positions buffers too
2833 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2835 this.commentStops[this.commentPtr] = stopPosition;
2836 this.commentStarts[this.commentPtr] = this.startPosition;
2839 // public final void recordComment(boolean isJavadoc) {
2840 // // a new annotation comment is recorded
2842 // commentStops[++commentPtr] = isJavadoc
2843 // ? currentPosition
2844 // : -currentPosition;
2845 // } catch (IndexOutOfBoundsException e) {
2846 // int oldStackLength = commentStops.length;
2847 // int[] oldStack = commentStops;
2848 // commentStops = new int[oldStackLength + 30];
2849 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2850 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2851 // //grows the positions buffers too
2852 // int[] old = commentStarts;
2853 // commentStarts = new int[oldStackLength + 30];
2854 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2856 // //the buffer is of a correct size here
2857 // commentStarts[commentPtr] = startPosition;
2859 public void resetTo(int begin, int end) {
2860 //reset the scanner to a given position where it may rescan again
2862 initialPosition = startPosition = currentPosition = begin;
2863 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2864 commentPtr = -1; // reset comment stack
2867 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2868 // the string with "\\u" is a legal string of two chars \ and u
2869 //thus we use a direct access to the source (for regular cases).
2870 // if (unicodeAsBackSlash) {
2871 // // consume next character
2872 // unicodeAsBackSlash = false;
2873 // if (((currentCharacter = source[currentPosition++]) == '\\')
2874 // && (source[currentPosition] == 'u')) {
2875 // getNextUnicodeChar();
2877 // if (withoutUnicodePtr != 0) {
2878 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2882 currentCharacter = source[currentPosition++];
2883 switch (currentCharacter) {
2885 currentCharacter = '\'';
2888 currentCharacter = '\\';
2891 currentCharacter = '\\';
2896 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2897 currentCharacter = source[currentPosition++];
2898 switch (currentCharacter) {
2900 // currentCharacter = '\b';
2903 currentCharacter = '\t';
2906 currentCharacter = '\n';
2909 // currentCharacter = '\f';
2912 currentCharacter = '\r';
2915 currentCharacter = '\"';
2918 currentCharacter = '\'';
2921 currentCharacter = '\\';
2924 currentCharacter = '$';
2927 // -----------octal escape--------------
2929 // OctalDigit OctalDigit
2930 // ZeroToThree OctalDigit OctalDigit
2931 int number = Character.getNumericValue(currentCharacter);
2932 if (number >= 0 && number <= 7) {
2933 boolean zeroToThreeNot = number > 3;
2934 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2935 int digit = Character.getNumericValue(currentCharacter);
2936 if (digit >= 0 && digit <= 7) {
2937 number = (number * 8) + digit;
2938 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2939 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2940 // Digit --> ignore last character
2943 digit = Character.getNumericValue(currentCharacter);
2944 if (digit >= 0 && digit <= 7) {
2945 // has read \ZeroToThree OctalDigit OctalDigit
2946 number = (number * 8) + digit;
2947 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2948 // --> ignore last character
2952 } else { // has read \OctalDigit NonDigit--> ignore last
2956 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2960 } else { // has read \OctalDigit --> ignore last character
2964 throw new InvalidInputException(INVALID_ESCAPE);
2965 currentCharacter = (char) number;
2968 // throw new InvalidInputException(INVALID_ESCAPE);
2972 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2973 // return scanIdentifierOrKeyword( false );
2975 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2977 //first dispatch on the first char.
2978 //then the length. If there are several
2979 //keywors with the same length AND the same first char, then do another
2980 //disptach on the second char :-)...cool....but fast !
2981 useAssertAsAnIndentifier = false;
2982 while (getNextCharAsJavaIdentifierPart()) {
2986 // if (new String(getCurrentTokenSource()).equals("$this")) {
2987 // return TokenNamethis;
2989 return TokenNameVariable;
2994 // if (withoutUnicodePtr == 0)
2995 //quick test on length == 1 but not on length > 12 while most identifier
2996 //have a length which is <= 12...but there are lots of identifier with
2999 if ((length = currentPosition - startPosition) == 1)
3000 return TokenNameIdentifier;
3002 data = new char[length];
3003 index = startPosition;
3004 for (int i = 0; i < length; i++) {
3005 data[i] = Character.toLowerCase(source[index + i]);
3009 // if ((length = withoutUnicodePtr) == 1)
3010 // return TokenNameIdentifier;
3011 // // data = withoutUnicodeBuffer;
3012 // data = new char[withoutUnicodeBuffer.length];
3013 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3014 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3018 firstLetter = data[index];
3019 switch (firstLetter) {
3024 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3025 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3026 return TokenNameFILE;
3027 index = 0; //__LINE__
3028 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3029 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3030 return TokenNameLINE;
3034 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3035 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3036 return TokenNameCLASS_C;
3040 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3041 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3042 && (data[++index] == '_'))
3043 return TokenNameMETHOD_C;
3047 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3048 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3049 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3050 return TokenNameFUNC_C;
3053 return TokenNameIdentifier;
3055 // as and array abstract
3059 if ((data[++index] == 's')) {
3062 return TokenNameIdentifier;
3066 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3067 return TokenNameand;
3069 return TokenNameIdentifier;
3073 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3074 return TokenNamearray;
3076 return TokenNameIdentifier;
3078 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3079 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3080 return TokenNameabstract;
3082 return TokenNameIdentifier;
3084 return TokenNameIdentifier;
3090 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3091 return TokenNamebreak;
3093 return TokenNameIdentifier;
3095 return TokenNameIdentifier;
3098 //case catch class clone const continue
3101 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3102 return TokenNamecase;
3104 return TokenNameIdentifier;
3106 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3107 return TokenNamecatch;
3109 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3110 return TokenNameclass;
3112 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3113 return TokenNameclone;
3115 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3116 return TokenNameconst;
3118 return TokenNameIdentifier;
3120 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3121 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3122 return TokenNamecontinue;
3124 return TokenNameIdentifier;
3126 return TokenNameIdentifier;
3129 // declare default do die
3130 // TODO delete define ==> no keyword !
3133 if ((data[++index] == 'o'))
3136 return TokenNameIdentifier;
3138 // if ((data[++index] == 'e')
3139 // && (data[++index] == 'f')
3140 // && (data[++index] == 'i')
3141 // && (data[++index] == 'n')
3142 // && (data[++index] == 'e'))
3143 // return TokenNamedefine;
3145 // return TokenNameIdentifier;
3147 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3148 && (data[++index] == 'r') && (data[++index] == 'e'))
3149 return TokenNamedeclare;
3151 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3152 && (data[++index] == 'l') && (data[++index] == 't'))
3153 return TokenNamedefault;
3155 return TokenNameIdentifier;
3157 return TokenNameIdentifier;
3160 //echo else exit elseif extends eval
3163 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3164 return TokenNameecho;
3165 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3166 return TokenNameelse;
3167 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3168 return TokenNameexit;
3169 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3170 return TokenNameeval;
3172 return TokenNameIdentifier;
3175 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3176 return TokenNameendif;
3177 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3178 return TokenNameempty;
3180 return TokenNameIdentifier;
3183 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3184 && (data[++index] == 'r'))
3185 return TokenNameendfor;
3186 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3187 && (data[++index] == 'f'))
3188 return TokenNameelseif;
3190 return TokenNameIdentifier;
3192 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3193 && (data[++index] == 'd') && (data[++index] == 's'))
3194 return TokenNameextends;
3196 return TokenNameIdentifier;
3199 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3200 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3201 return TokenNameendwhile;
3203 return TokenNameIdentifier;
3206 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3207 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3208 return TokenNameendswitch;
3210 return TokenNameIdentifier;
3213 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3214 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3215 && (data[++index] == 'e'))
3216 return TokenNameenddeclare;
3218 if ((data[++index] == 'n') // endforeach
3219 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3220 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3221 return TokenNameendforeach;
3223 return TokenNameIdentifier;
3225 return TokenNameIdentifier;
3228 //for false final function
3231 if ((data[++index] == 'o') && (data[++index] == 'r'))
3232 return TokenNamefor;
3234 return TokenNameIdentifier;
3236 // if ((data[++index] == 'a') && (data[++index] == 'l')
3237 // && (data[++index] == 's') && (data[++index] == 'e'))
3238 // return TokenNamefalse;
3239 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3240 return TokenNamefinal;
3242 return TokenNameIdentifier;
3245 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3246 && (data[++index] == 'c') && (data[++index] == 'h'))
3247 return TokenNameforeach;
3249 return TokenNameIdentifier;
3252 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3253 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3254 return TokenNamefunction;
3256 return TokenNameIdentifier;
3258 return TokenNameIdentifier;
3263 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3264 && (data[++index] == 'l')) {
3265 return TokenNameglobal;
3268 return TokenNameIdentifier;
3270 //if int isset include include_once instanceof interface implements
3273 if (data[++index] == 'f')
3276 return TokenNameIdentifier;
3278 // if ((data[++index] == 'n') && (data[++index] == 't'))
3279 // return TokenNameint;
3281 // return TokenNameIdentifier;
3283 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3284 return TokenNameisset;
3286 return TokenNameIdentifier;
3288 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3289 && (data[++index] == 'd') && (data[++index] == 'e'))
3290 return TokenNameinclude;
3292 return TokenNameIdentifier;
3295 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3296 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3297 return TokenNameinterface;
3299 return TokenNameIdentifier;
3302 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3303 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3304 && (data[++index] == 'f'))
3305 return TokenNameinstanceof;
3306 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3307 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3308 && (data[++index] == 's'))
3309 return TokenNameimplements;
3311 return TokenNameIdentifier;
3313 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3314 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3315 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3316 return TokenNameinclude_once;
3318 return TokenNameIdentifier;
3320 return TokenNameIdentifier;
3325 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3326 return TokenNamelist;
3329 return TokenNameIdentifier;
3334 if ((data[++index] == 'e') && (data[++index] == 'w'))
3335 return TokenNamenew;
3337 return TokenNameIdentifier;
3339 // if ((data[++index] == 'u') && (data[++index] == 'l')
3340 // && (data[++index] == 'l'))
3341 // return TokenNamenull;
3343 // return TokenNameIdentifier;
3345 return TokenNameIdentifier;
3350 if (data[++index] == 'r') {
3354 // if (length == 12) {
3355 // if ((data[++index] == 'l')
3356 // && (data[++index] == 'd')
3357 // && (data[++index] == '_')
3358 // && (data[++index] == 'f')
3359 // && (data[++index] == 'u')
3360 // && (data[++index] == 'n')
3361 // && (data[++index] == 'c')
3362 // && (data[++index] == 't')
3363 // && (data[++index] == 'i')
3364 // && (data[++index] == 'o')
3365 // && (data[++index] == 'n')) {
3366 // return TokenNameold_function;
3369 return TokenNameIdentifier;
3371 // print public private protected
3374 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3375 return TokenNameprint;
3377 return TokenNameIdentifier;
3379 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3380 && (data[++index] == 'c')) {
3381 return TokenNamepublic;
3383 return TokenNameIdentifier;
3385 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3386 && (data[++index] == 't') && (data[++index] == 'e')) {
3387 return TokenNameprivate;
3389 return TokenNameIdentifier;
3391 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3392 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3393 return TokenNameprotected;
3395 return TokenNameIdentifier;
3397 return TokenNameIdentifier;
3399 //return require require_once
3401 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3402 && (data[++index] == 'n')) {
3403 return TokenNamereturn;
3405 } else if (length == 7) {
3406 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3407 && (data[++index] == 'r') && (data[++index] == 'e')) {
3408 return TokenNamerequire;
3410 } else if (length == 12) {
3411 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3412 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3413 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3414 return TokenNamerequire_once;
3417 return TokenNameIdentifier;
3422 if (data[++index] == 't')
3423 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3424 return TokenNamestatic;
3426 return TokenNameIdentifier;
3427 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3428 && (data[++index] == 'h'))
3429 return TokenNameswitch;
3431 return TokenNameIdentifier;
3433 return TokenNameIdentifier;
3439 if ((data[++index] == 'r') && (data[++index] == 'y'))
3440 return TokenNametry;
3442 return TokenNameIdentifier;
3444 // if ((data[++index] == 'r') && (data[++index] == 'u')
3445 // && (data[++index] == 'e'))
3446 // return TokenNametrue;
3448 // return TokenNameIdentifier;
3450 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3451 return TokenNamethrow;
3453 return TokenNameIdentifier;
3455 return TokenNameIdentifier;
3461 if ((data[++index] == 's') && (data[++index] == 'e'))
3462 return TokenNameuse;
3464 return TokenNameIdentifier;
3466 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3467 return TokenNameunset;
3469 return TokenNameIdentifier;
3471 return TokenNameIdentifier;
3477 if ((data[++index] == 'a') && (data[++index] == 'r'))
3478 return TokenNamevar;
3480 return TokenNameIdentifier;
3482 return TokenNameIdentifier;
3488 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3489 return TokenNamewhile;
3491 return TokenNameIdentifier;
3492 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3493 // (data[++index]=='e') && (data[++index]=='f')&&
3494 // (data[++index]=='p'))
3495 //return TokenNamewidefp ;
3497 //return TokenNameIdentifier;
3499 return TokenNameIdentifier;
3505 if ((data[++index] == 'o') && (data[++index] == 'r'))
3506 return TokenNamexor;
3508 return TokenNameIdentifier;
3510 return TokenNameIdentifier;
3513 return TokenNameIdentifier;
3517 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3518 //when entering this method the currentCharacter is the firt
3519 //digit of the number , i.e. it may be preceeded by a . when
3521 boolean floating = dotPrefix;
3522 if ((!dotPrefix) && (currentCharacter == '0')) {
3523 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3524 //force the first char of the hexa number do exist...
3525 // consume next character
3526 unicodeAsBackSlash = false;
3527 currentCharacter = source[currentPosition++];
3528 // if (((currentCharacter = source[currentPosition++]) == '\\')
3529 // && (source[currentPosition] == 'u')) {
3530 // getNextUnicodeChar();
3532 // if (withoutUnicodePtr != 0) {
3533 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3536 if (Character.digit(currentCharacter, 16) == -1)
3537 throw new InvalidInputException(INVALID_HEXA);
3539 while (getNextCharAsDigit(16)) {
3542 // if (getNextChar('l', 'L') >= 0)
3543 // return TokenNameLongLiteral;
3545 return TokenNameIntegerLiteral;
3547 //there is x or X in the number
3548 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3549 // 00078.0 is true !!!!! crazy language
3550 if (getNextCharAsDigit()) {
3551 //-------------potential octal-----------------
3552 while (getNextCharAsDigit()) {
3555 // if (getNextChar('l', 'L') >= 0) {
3556 // return TokenNameLongLiteral;
3559 // if (getNextChar('f', 'F') >= 0) {
3560 // return TokenNameFloatingPointLiteral;
3562 if (getNextChar('d', 'D') >= 0) {
3563 return TokenNameDoubleLiteral;
3564 } else { //make the distinction between octal and float ....
3565 if (getNextChar('.')) { //bingo ! ....
3566 while (getNextCharAsDigit()) {
3569 if (getNextChar('e', 'E') >= 0) {
3570 // consume next character
3571 unicodeAsBackSlash = false;
3572 currentCharacter = source[currentPosition++];
3573 // if (((currentCharacter = source[currentPosition++]) == '\\')
3574 // && (source[currentPosition] == 'u')) {
3575 // getNextUnicodeChar();
3577 // if (withoutUnicodePtr != 0) {
3578 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3581 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3582 // consume next character
3583 unicodeAsBackSlash = false;
3584 currentCharacter = source[currentPosition++];
3585 // if (((currentCharacter = source[currentPosition++]) == '\\')
3586 // && (source[currentPosition] == 'u')) {
3587 // getNextUnicodeChar();
3589 // if (withoutUnicodePtr != 0) {
3590 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3591 // currentCharacter;
3595 if (!Character.isDigit(currentCharacter))
3596 throw new InvalidInputException(INVALID_FLOAT);
3597 while (getNextCharAsDigit()) {
3601 // if (getNextChar('f', 'F') >= 0)
3602 // return TokenNameFloatingPointLiteral;
3603 getNextChar('d', 'D'); //jump over potential d or D
3604 return TokenNameDoubleLiteral;
3606 return TokenNameIntegerLiteral;
3613 while (getNextCharAsDigit()) {
3616 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3617 // return TokenNameLongLiteral;
3618 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3619 while (getNextCharAsDigit()) {
3624 //if floating is true both exponant and suffix may be optional
3625 if (getNextChar('e', 'E') >= 0) {
3627 // consume next character
3628 unicodeAsBackSlash = false;
3629 currentCharacter = source[currentPosition++];
3630 // if (((currentCharacter = source[currentPosition++]) == '\\')
3631 // && (source[currentPosition] == 'u')) {
3632 // getNextUnicodeChar();
3634 // if (withoutUnicodePtr != 0) {
3635 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3638 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3641 unicodeAsBackSlash = false;
3642 currentCharacter = source[currentPosition++];
3643 // if (((currentCharacter = source[currentPosition++]) == '\\')
3644 // && (source[currentPosition] == 'u')) {
3645 // getNextUnicodeChar();
3647 // if (withoutUnicodePtr != 0) {
3648 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3652 if (!Character.isDigit(currentCharacter))
3653 throw new InvalidInputException(INVALID_FLOAT);
3654 while (getNextCharAsDigit()) {
3658 if (getNextChar('d', 'D') >= 0)
3659 return TokenNameDoubleLiteral;
3660 // if (getNextChar('f', 'F') >= 0)
3661 // return TokenNameFloatingPointLiteral;
3662 //the long flag has been tested before
3663 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3667 * Search the line number corresponding to a specific position
3670 public final int getLineNumber(int position) {
3671 if (lineEnds == null)
3673 int length = linePtr + 1;
3676 int g = 0, d = length - 1;
3680 if (position < lineEnds[m]) {
3682 } else if (position > lineEnds[m]) {
3688 if (position < lineEnds[m]) {
3694 public void setPHPMode(boolean mode) {
3698 public final void setSource(char[] source) {
3699 setSource(null, source);
3702 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3703 //the source-buffer is set to sourceString
3704 this.compilationUnit = compilationUnit;
3705 if (source == null) {
3706 this.source = new char[0];
3708 this.source = source;
3711 initialPosition = currentPosition = 0;
3712 containsAssertKeyword = false;
3713 withoutUnicodeBuffer = new char[this.source.length];
3714 // encapsedStringStack = new Stack();
3717 public String toString() {
3718 if (startPosition == source.length)
3719 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3720 if (currentPosition > source.length)
3721 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3722 char front[] = new char[startPosition];
3723 System.arraycopy(source, 0, front, 0, startPosition);
3724 int middleLength = (currentPosition - 1) - startPosition + 1;
3726 if (middleLength > -1) {
3727 middle = new char[middleLength];
3728 System.arraycopy(source, startPosition, middle, 0, middleLength);
3730 middle = new char[0];
3732 char end[] = new char[source.length - (currentPosition - 1)];
3733 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3734 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3735 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3739 public final String toStringAction(int act) {
3741 case TokenNameERROR:
3742 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3744 case TokenNameINLINE_HTML:
3745 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3746 case TokenNameIdentifier:
3747 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3748 case TokenNameVariable:
3749 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3750 case TokenNameabstract:
3751 return "abstract"; //$NON-NLS-1$
3753 return "AND"; //$NON-NLS-1$
3754 case TokenNamearray:
3755 return "array"; //$NON-NLS-1$
3757 return "as"; //$NON-NLS-1$
3758 case TokenNamebreak:
3759 return "break"; //$NON-NLS-1$
3761 return "case"; //$NON-NLS-1$
3762 case TokenNameclass:
3763 return "class"; //$NON-NLS-1$
3764 case TokenNamecatch:
3765 return "catch"; //$NON-NLS-1$
3766 case TokenNameclone:
3769 case TokenNameconst:
3772 case TokenNamecontinue:
3773 return "continue"; //$NON-NLS-1$
3774 case TokenNamedefault:
3775 return "default"; //$NON-NLS-1$
3776 // case TokenNamedefine :
3777 // return "define"; //$NON-NLS-1$
3779 return "do"; //$NON-NLS-1$
3781 return "echo"; //$NON-NLS-1$
3783 return "else"; //$NON-NLS-1$
3784 case TokenNameelseif:
3785 return "elseif"; //$NON-NLS-1$
3786 case TokenNameendfor:
3787 return "endfor"; //$NON-NLS-1$
3788 case TokenNameendforeach:
3789 return "endforeach"; //$NON-NLS-1$
3790 case TokenNameendif:
3791 return "endif"; //$NON-NLS-1$
3792 case TokenNameendswitch:
3793 return "endswitch"; //$NON-NLS-1$
3794 case TokenNameendwhile:
3795 return "endwhile"; //$NON-NLS-1$
3798 case TokenNameextends:
3799 return "extends"; //$NON-NLS-1$
3800 // case TokenNamefalse :
3801 // return "false"; //$NON-NLS-1$
3802 case TokenNamefinal:
3803 return "final"; //$NON-NLS-1$
3805 return "for"; //$NON-NLS-1$
3806 case TokenNameforeach:
3807 return "foreach"; //$NON-NLS-1$
3808 case TokenNamefunction:
3809 return "function"; //$NON-NLS-1$
3810 case TokenNameglobal:
3811 return "global"; //$NON-NLS-1$
3813 return "if"; //$NON-NLS-1$
3814 case TokenNameimplements:
3815 return "implements"; //$NON-NLS-1$
3816 case TokenNameinclude:
3817 return "include"; //$NON-NLS-1$
3818 case TokenNameinclude_once:
3819 return "include_once"; //$NON-NLS-1$
3820 case TokenNameinstanceof:
3821 return "instanceof"; //$NON-NLS-1$
3822 case TokenNameinterface:
3823 return "interface"; //$NON-NLS-1$
3824 case TokenNameisset:
3825 return "isset"; //$NON-NLS-1$
3827 return "list"; //$NON-NLS-1$
3829 return "new"; //$NON-NLS-1$
3830 // case TokenNamenull :
3831 // return "null"; //$NON-NLS-1$
3833 return "OR"; //$NON-NLS-1$
3834 case TokenNameprint:
3835 return "print"; //$NON-NLS-1$
3836 case TokenNameprivate:
3837 return "private"; //$NON-NLS-1$
3838 case TokenNameprotected:
3839 return "protected"; //$NON-NLS-1$
3840 case TokenNamepublic:
3841 return "public"; //$NON-NLS-1$
3842 case TokenNamerequire:
3843 return "require"; //$NON-NLS-1$
3844 case TokenNamerequire_once:
3845 return "require_once"; //$NON-NLS-1$
3846 case TokenNamereturn:
3847 return "return"; //$NON-NLS-1$
3848 case TokenNamestatic:
3849 return "static"; //$NON-NLS-1$
3850 case TokenNameswitch:
3851 return "switch"; //$NON-NLS-1$
3852 // case TokenNametrue :
3853 // return "true"; //$NON-NLS-1$
3854 case TokenNameunset:
3855 return "unset"; //$NON-NLS-1$
3857 return "var"; //$NON-NLS-1$
3858 case TokenNamewhile:
3859 return "while"; //$NON-NLS-1$
3861 return "XOR"; //$NON-NLS-1$
3862 // case TokenNamethis :
3863 // return "$this"; //$NON-NLS-1$
3864 case TokenNameIntegerLiteral:
3865 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3866 case TokenNameDoubleLiteral:
3867 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3868 case TokenNameStringDoubleQuote:
3869 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3870 case TokenNameStringSingleQuote:
3871 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3872 case TokenNameStringInterpolated:
3873 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3874 case TokenNameEncapsedString0:
3875 return "`"; //$NON-NLS-1$
3876 // case TokenNameEncapsedString1:
3877 // return "\'"; //$NON-NLS-1$
3878 // case TokenNameEncapsedString2:
3879 // return "\""; //$NON-NLS-1$
3880 case TokenNameSTRING:
3881 return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3882 case TokenNameHEREDOC:
3883 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3884 case TokenNamePLUS_PLUS:
3885 return "++"; //$NON-NLS-1$
3886 case TokenNameMINUS_MINUS:
3887 return "--"; //$NON-NLS-1$
3888 case TokenNameEQUAL_EQUAL:
3889 return "=="; //$NON-NLS-1$
3890 case TokenNameEQUAL_EQUAL_EQUAL:
3891 return "==="; //$NON-NLS-1$
3892 case TokenNameEQUAL_GREATER:
3893 return "=>"; //$NON-NLS-1$
3894 case TokenNameLESS_EQUAL:
3895 return "<="; //$NON-NLS-1$
3896 case TokenNameGREATER_EQUAL:
3897 return ">="; //$NON-NLS-1$
3898 case TokenNameNOT_EQUAL:
3899 return "!="; //$NON-NLS-1$
3900 case TokenNameNOT_EQUAL_EQUAL:
3901 return "!=="; //$NON-NLS-1$
3902 case TokenNameLEFT_SHIFT:
3903 return "<<"; //$NON-NLS-1$
3904 case TokenNameRIGHT_SHIFT:
3905 return ">>"; //$NON-NLS-1$
3906 case TokenNamePLUS_EQUAL:
3907 return "+="; //$NON-NLS-1$
3908 case TokenNameMINUS_EQUAL:
3909 return "-="; //$NON-NLS-1$
3910 case TokenNameMULTIPLY_EQUAL:
3911 return "*="; //$NON-NLS-1$
3912 case TokenNameDIVIDE_EQUAL:
3913 return "/="; //$NON-NLS-1$
3914 case TokenNameAND_EQUAL:
3915 return "&="; //$NON-NLS-1$
3916 case TokenNameOR_EQUAL:
3917 return "|="; //$NON-NLS-1$
3918 case TokenNameXOR_EQUAL:
3919 return "^="; //$NON-NLS-1$
3920 case TokenNameREMAINDER_EQUAL:
3921 return "%="; //$NON-NLS-1$
3922 case TokenNameDOT_EQUAL:
3923 return ".="; //$NON-NLS-1$
3924 case TokenNameLEFT_SHIFT_EQUAL:
3925 return "<<="; //$NON-NLS-1$
3926 case TokenNameRIGHT_SHIFT_EQUAL:
3927 return ">>="; //$NON-NLS-1$
3928 case TokenNameOR_OR:
3929 return "||"; //$NON-NLS-1$
3930 case TokenNameAND_AND:
3931 return "&&"; //$NON-NLS-1$
3933 return "+"; //$NON-NLS-1$
3934 case TokenNameMINUS:
3935 return "-"; //$NON-NLS-1$
3936 case TokenNameMINUS_GREATER:
3939 return "!"; //$NON-NLS-1$
3940 case TokenNameREMAINDER:
3941 return "%"; //$NON-NLS-1$
3943 return "^"; //$NON-NLS-1$
3945 return "&"; //$NON-NLS-1$
3946 case TokenNameMULTIPLY:
3947 return "*"; //$NON-NLS-1$
3949 return "|"; //$NON-NLS-1$
3950 case TokenNameTWIDDLE:
3951 return "~"; //$NON-NLS-1$
3952 case TokenNameTWIDDLE_EQUAL:
3953 return "~="; //$NON-NLS-1$
3954 case TokenNameDIVIDE:
3955 return "/"; //$NON-NLS-1$
3956 case TokenNameGREATER:
3957 return ">"; //$NON-NLS-1$
3959 return "<"; //$NON-NLS-1$
3960 case TokenNameLPAREN:
3961 return "("; //$NON-NLS-1$
3962 case TokenNameRPAREN:
3963 return ")"; //$NON-NLS-1$
3964 case TokenNameLBRACE:
3965 return "{"; //$NON-NLS-1$
3966 case TokenNameRBRACE:
3967 return "}"; //$NON-NLS-1$
3968 case TokenNameLBRACKET:
3969 return "["; //$NON-NLS-1$
3970 case TokenNameRBRACKET:
3971 return "]"; //$NON-NLS-1$
3972 case TokenNameSEMICOLON:
3973 return ";"; //$NON-NLS-1$
3974 case TokenNameQUESTION:
3975 return "?"; //$NON-NLS-1$
3976 case TokenNameCOLON:
3977 return ":"; //$NON-NLS-1$
3978 case TokenNameCOMMA:
3979 return ","; //$NON-NLS-1$
3981 return "."; //$NON-NLS-1$
3982 case TokenNameEQUAL:
3983 return "="; //$NON-NLS-1$
3986 case TokenNameDOLLAR:
3988 case TokenNameDOLLAR_LBRACE:
3990 case TokenNameLBRACE_DOLLAR:
3993 return "EOF"; //$NON-NLS-1$
3994 case TokenNameWHITESPACE:
3995 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3996 case TokenNameCOMMENT_LINE:
3997 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3998 case TokenNameCOMMENT_BLOCK:
3999 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4000 case TokenNameCOMMENT_PHPDOC:
4001 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4002 // case TokenNameHTML :
4003 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4006 return "__FILE__"; //$NON-NLS-1$
4008 return "__LINE__"; //$NON-NLS-1$
4009 case TokenNameCLASS_C:
4010 return "__CLASS__"; //$NON-NLS-1$
4011 case TokenNameMETHOD_C:
4012 return "__METHOD__"; //$NON-NLS-1$
4013 case TokenNameFUNC_C:
4014 return "__FUNCTION__"; //$NON-NLS-1
4015 case TokenNameboolCAST:
4016 return "( bool )"; //$NON-NLS-1$
4017 case TokenNameintCAST:
4018 return "( int )"; //$NON-NLS-1$
4019 case TokenNamedoubleCAST:
4020 return "( double )"; //$NON-NLS-1$
4021 case TokenNameobjectCAST:
4022 return "( object )"; //$NON-NLS-1$
4023 case TokenNamestringCAST:
4024 return "( string )"; //$NON-NLS-1$
4026 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4034 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4035 this(tokenizeComments, tokenizeWhiteSpace, false);
4038 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4039 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4042 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4043 boolean assertMode) {
4044 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4047 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4048 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4049 this.eofPosition = Integer.MAX_VALUE;
4050 this.tokenizeComments = tokenizeComments;
4051 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4052 this.tokenizeStrings = tokenizeStrings;
4053 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4054 this.assertMode = assertMode;
4055 // this.encapsedStringStack = null;
4056 this.taskTags = taskTags;
4057 this.taskPriorities = taskPriorities;
4060 private void checkNonExternalizeString() throws InvalidInputException {
4061 if (currentLine == null)
4063 parseTags(currentLine);
4066 private void parseTags(NLSLine line) throws InvalidInputException {
4067 String s = new String(getCurrentTokenSource());
4068 int pos = s.indexOf(TAG_PREFIX);
4069 int lineLength = line.size();
4071 int start = pos + TAG_PREFIX_LENGTH;
4072 int end = s.indexOf(TAG_POSTFIX, start);
4073 String index = s.substring(start, end);
4076 i = Integer.parseInt(index) - 1;
4077 // Tags are one based not zero based.
4078 } catch (NumberFormatException e) {
4079 i = -1; // we don't want to consider this as a valid NLS tag
4081 if (line.exists(i)) {
4084 pos = s.indexOf(TAG_PREFIX, start);
4086 this.nonNLSStrings = new StringLiteral[lineLength];
4087 int nonNLSCounter = 0;
4088 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4089 StringLiteral literal = (StringLiteral) iterator.next();
4090 if (literal != null) {
4091 this.nonNLSStrings[nonNLSCounter++] = literal;
4094 if (nonNLSCounter == 0) {
4095 this.nonNLSStrings = null;
4099 this.wasNonExternalizedStringLiteral = true;
4100 if (nonNLSCounter != lineLength) {
4101 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4106 public final void scanEscapeCharacter() throws InvalidInputException {
4107 // the string with "\\u" is a legal string of two chars \ and u
4108 //thus we use a direct access to the source (for regular cases).
4109 if (unicodeAsBackSlash) {
4110 // consume next character
4111 unicodeAsBackSlash = false;
4112 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4113 // (source[currentPosition] == 'u')) {
4114 // getNextUnicodeChar();
4116 if (withoutUnicodePtr != 0) {
4117 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4121 currentCharacter = source[currentPosition++];
4122 switch (currentCharacter) {
4124 currentCharacter = '\b';
4127 currentCharacter = '\t';
4130 currentCharacter = '\n';
4133 currentCharacter = '\f';
4136 currentCharacter = '\r';
4139 currentCharacter = '\"';
4142 currentCharacter = '\'';
4145 currentCharacter = '\\';
4148 // -----------octal escape--------------
4150 // OctalDigit OctalDigit
4151 // ZeroToThree OctalDigit OctalDigit
4152 int number = Character.getNumericValue(currentCharacter);
4153 if (number >= 0 && number <= 7) {
4154 boolean zeroToThreeNot = number > 3;
4155 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4156 int digit = Character.getNumericValue(currentCharacter);
4157 if (digit >= 0 && digit <= 7) {
4158 number = (number * 8) + digit;
4159 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4160 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4161 // Digit --> ignore last character
4164 digit = Character.getNumericValue(currentCharacter);
4165 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4166 // OctalDigit OctalDigit
4167 number = (number * 8) + digit;
4168 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4169 // --> ignore last character
4173 } else { // has read \OctalDigit NonDigit--> ignore last
4177 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4181 } else { // has read \OctalDigit --> ignore last character
4185 throw new InvalidInputException(INVALID_ESCAPE);
4186 currentCharacter = (char) number;
4188 throw new InvalidInputException(INVALID_ESCAPE);
4192 //chech presence of task: tags
4193 //TODO (frederic) see if we need to take unicode characters into account...
4194 public void checkTaskTag(int commentStart, int commentEnd) {
4195 char[] src = this.source;
4197 // only look for newer task: tags
4198 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4201 int foundTaskIndex = this.foundTaskCount;
4202 char previous = src[commentStart + 1]; // should be '*' or '/'
4203 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4205 char[] priority = null;
4206 // check for tag occurrence only if not ambiguous with javadoc tag
4207 if (previous != '@') {
4208 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4209 tag = this.taskTags[itag];
4210 int tagLength = tag.length;
4214 // ensure tag is not leaded with letter if tag starts with a letter
4215 if (Scanner.isPHPIdentifierStart(tag[0])) {
4216 if (Scanner.isPHPIdentifierPart(previous)) {
4221 for (int t = 0; t < tagLength; t++) {
4224 if (x >= this.eofPosition || x >= commentEnd)
4226 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4227 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4232 // ensure tag is not followed with letter if tag finishes with a letter
4233 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4234 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4237 if (this.foundTaskTags == null) {
4238 this.foundTaskTags = new char[5][];
4239 this.foundTaskMessages = new char[5][];
4240 this.foundTaskPriorities = new char[5][];
4241 this.foundTaskPositions = new int[5][];
4242 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4243 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4244 this.foundTaskCount);
4245 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4246 this.foundTaskCount);
4247 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4248 this.foundTaskCount);
4249 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4250 this.foundTaskCount);
4253 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4255 this.foundTaskTags[this.foundTaskCount] = tag;
4256 this.foundTaskPriorities[this.foundTaskCount] = priority;
4257 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4258 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4259 this.foundTaskCount++;
4260 i += tagLength - 1; // will be incremented when looping
4266 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4267 // retrieve message start and end positions
4268 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4269 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4270 // at most beginning of next task
4271 if (max_value < msgStart) {
4272 max_value = msgStart; // would only occur if tag is before EOF.
4276 for (int j = msgStart; j < max_value; j++) {
4277 if ((c = src[j]) == '\n' || c == '\r') {
4283 for (int j = max_value; j > msgStart; j--) {
4284 if ((c = src[j]) == '*') {
4292 if (msgStart == end)
4295 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4297 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4299 // update the end position of the task
4300 this.foundTaskPositions[i][1] = end;
4301 // get the message source
4302 final int messageLength = end - msgStart + 1;
4303 char[] message = new char[messageLength];
4304 System.arraycopy(src, msgStart, message, 0, messageLength);
4305 this.foundTaskMessages[i] = message;
4309 // chech presence of task: tags
4310 // public void checkTaskTag(int commentStart, int commentEnd) {
4311 // // only look for newer task: tags
4312 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4315 // int foundTaskIndex = this.foundTaskCount;
4316 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4317 // char[] tag = null;
4318 // char[] priority = null;
4319 // // check for tag occurrence
4320 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4321 // tag = this.taskTags[itag];
4322 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4323 // int tagLength = tag.length;
4324 // for (int t = 0; t < tagLength; t++) {
4325 // if (this.source[i + t] != tag[t])
4326 // continue nextTag;
4328 // if (this.foundTaskTags == null) {
4329 // this.foundTaskTags = new char[5][];
4330 // this.foundTaskMessages = new char[5][];
4331 // this.foundTaskPriorities = new char[5][];
4332 // this.foundTaskPositions = new int[5][];
4333 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4334 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4335 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4336 // this.foundTaskCount);
4337 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4338 // this.foundTaskCount);
4339 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4340 // this.foundTaskCount);
4342 // this.foundTaskTags[this.foundTaskCount] = tag;
4343 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4344 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4345 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4346 // this.foundTaskCount++;
4347 // i += tagLength - 1; // will be incremented when looping
4350 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4351 // // retrieve message start and end positions
4352 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4353 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4354 // // at most beginning of next task
4355 // if (max_value < msgStart)
4356 // max_value = msgStart; // would only occur if tag is before EOF.
4359 // for (int j = msgStart; j < max_value; j++) {
4360 // if ((c = this.source[j]) == '\n' || c == '\r') {
4366 // for (int j = max_value; j > msgStart; j--) {
4367 // if ((c = this.source[j]) == '*') {
4375 // if (msgStart == end)
4376 // continue; // empty
4377 // // trim the message
4378 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4380 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4382 // // update the end position of the task
4383 // this.foundTaskPositions[i][1] = end;
4384 // // get the message source
4385 // final int messageLength = end - msgStart + 1;
4386 // char[] message = new char[messageLength];
4387 // System.arraycopy(source, msgStart, message, 0, messageLength);
4388 // this.foundTaskMessages[i] = message;