1 /***********************************************************************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3 * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4 * and is available at http://www.eclipse.org/legal/cpl-v05.html
6 * Contributors: IBM Corporation - initial API and implementation
7 **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13 import java.util.Stack;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20 import net.sourceforge.phpeclipse.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
24 * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
25 * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct char) -
26 * sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
31 public boolean useAssertAsAnIndentifier = false;
33 //flag indicating if processed source contains occurrences of keyword assert
34 public boolean containsAssertKeyword = false;
36 public boolean recordLineSeparator;
38 public boolean ignorePHPOneLiner = false;
40 public boolean phpMode = false;
42 public boolean phpExpressionTag = false;
44 public Stack encapsedStringStack = null;
46 public char currentCharacter;
48 public int startPosition;
50 public int currentPosition;
52 public int initialPosition, eofPosition;
54 // after this position eof are generated instead of real token from the
56 public boolean tokenizeComments;
58 public boolean tokenizeWhiteSpace;
60 public boolean tokenizeStrings;
62 //source should be viewed as a window (aka a part)
63 //of a entire very large stream
67 public char[] withoutUnicodeBuffer;
69 public int withoutUnicodePtr;
71 //when == 0 ==> no unicode in the current token
72 public boolean unicodeAsBackSlash = false;
74 public boolean scanningFloatLiteral = false;
76 //support for /** comments
77 public int[] commentStops = new int[10];
79 public int[] commentStarts = new int[10];
81 public int commentPtr = -1; // no comment test with commentPtr value -1
83 protected int lastCommentLinePosition = -1;
85 //diet parsing support - jump over some method body when requested
86 public boolean diet = false;
88 //support for the poor-line-debuggers ....
89 //remember the position of the cr/lf
90 public int[] lineEnds = new int[250];
92 public int linePtr = -1;
94 public boolean wasAcr = false;
96 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
98 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
100 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
102 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
104 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
106 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
108 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
110 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
112 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
114 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
116 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
118 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
120 //----------------optimized identifier managment------------------
121 static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
122 charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
123 charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
124 charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
125 charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
126 charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
127 charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
128 charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
129 charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
131 static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' },
134 charArray_vf = new char[] { '$', 'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135 charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136 charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137 charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138 charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139 charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140 charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
142 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
144 static final int TableSize = 30, InternalTableSize = 6;
147 public static final int OptimizedLength = 6;
150 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
152 // support for detecting non-externalized string literals
153 int currentLineNr = -1;
155 int previousLineNr = -1;
157 NLSLine currentLine = null;
159 List lines = new ArrayList();
161 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
163 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
165 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
167 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
169 public StringLiteral[] nonNLSStrings = null;
171 public boolean checkNonExternalizedStringLiterals = true;
173 public boolean wasNonExternalizedStringLiteral = false;
175 for (int i = 0; i < 6; i++) {
176 for (int j = 0; j < TableSize; j++) {
177 for (int k = 0; k < InternalTableSize; k++) {
178 charArray_length[i][j][k] = initCharArray;
184 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
186 public static final int RoundBracket = 0;
188 public static final int SquareBracket = 1;
190 public static final int CurlyBracket = 2;
192 public static final int BracketKinds = 3;
195 public char[][] foundTaskTags = null;
197 public char[][] foundTaskMessages;
199 public char[][] foundTaskPriorities = null;
201 public int[][] foundTaskPositions;
203 public int foundTaskCount = 0;
205 public char[][] taskTags = null;
207 public char[][] taskPriorities = null;
209 public boolean isTaskCaseSensitive = true;
211 public static final boolean DEBUG = false;
213 public static final boolean TRACE = false;
215 public ICompilationUnit compilationUnit = null;
218 * Determines if the specified character is permissible as the first character in a PHP identifier or variable
220 * The '$' character for PHP variables is regarded as a correct first character !
223 public static boolean isPHPIdentOrVarStart(char ch) {
224 return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
228 * Determines if the specified character is permissible as the first character in a PHP identifier.
230 * The '$' character for PHP variables isn't regarded as the first character !
232 public static boolean isPHPIdentifierStart(char ch) {
233 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
237 * Determines if the specified character may be part of a PHP identifier as other than the first character
239 public static boolean isPHPIdentifierPart(char ch) {
240 return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
243 public final boolean atEnd() {
244 // This code is not relevant if source is
245 // Only a part of the real stream input
246 return source.length == currentPosition;
249 public char[] getCurrentIdentifierSource() {
250 //return the token REAL source (aka unicodes are precomputed)
252 // if (withoutUnicodePtr != 0)
253 // //0 is used as a fast test flag so the real first char is in position 1
255 // withoutUnicodeBuffer,
257 // result = new char[withoutUnicodePtr],
259 // withoutUnicodePtr);
261 int length = currentPosition - startPosition;
262 switch (length) { // see OptimizedLength
264 return optimizedCurrentTokenSource1();
266 return optimizedCurrentTokenSource2();
268 return optimizedCurrentTokenSource3();
270 return optimizedCurrentTokenSource4();
272 return optimizedCurrentTokenSource5();
274 return optimizedCurrentTokenSource6();
277 System.arraycopy(source, startPosition, result = new char[length], 0, length);
282 public int getCurrentTokenEndPosition() {
283 return this.currentPosition - 1;
286 public final char[] getCurrentTokenSource() {
287 // Return the token REAL source (aka unicodes are precomputed)
289 // if (withoutUnicodePtr != 0)
290 // // 0 is used as a fast test flag so the real first char is in position 1
292 // withoutUnicodeBuffer,
294 // result = new char[withoutUnicodePtr],
296 // withoutUnicodePtr);
299 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
304 public final char[] getCurrentTokenSource(int startPos) {
305 // Return the token REAL source (aka unicodes are precomputed)
307 // if (withoutUnicodePtr != 0)
308 // // 0 is used as a fast test flag so the real first char is in position 1
310 // withoutUnicodeBuffer,
312 // result = new char[withoutUnicodePtr],
314 // withoutUnicodePtr);
317 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
322 public final char[] getCurrentTokenSourceString() {
323 //return the token REAL source (aka unicodes are precomputed).
324 //REMOVE the two " that are at the beginning and the end.
326 if (withoutUnicodePtr != 0)
327 //0 is used as a fast test flag so the real first char is in position 1
328 System.arraycopy(withoutUnicodeBuffer, 2,
329 //2 is 1 (real start) + 1 (to jump over the ")
330 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
333 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
338 public final char[] getRawTokenSourceEnd() {
339 int length = this.eofPosition - this.currentPosition - 1;
340 char[] sourceEnd = new char[length];
341 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
345 public int getCurrentTokenStartPosition() {
346 return this.startPosition;
349 public final char[] getCurrentStringLiteralSource() {
350 // Return the token REAL source (aka unicodes are precomputed)
351 if (startPosition + 1 >= currentPosition) {
356 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
361 public final char[] getCurrentStringLiteralSource(int startPos) {
362 // Return the token REAL source (aka unicodes are precomputed)
365 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
371 * Search the source position corresponding to the end of a given line number
373 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
375 * In case the given line number is inconsistent, answers -1.
377 public final int getLineEnd(int lineNumber) {
378 if (lineEnds == null)
380 if (lineNumber >= lineEnds.length)
384 if (lineNumber == lineEnds.length - 1)
386 return lineEnds[lineNumber - 1];
387 // next line start one character behind the lineEnd of the previous line
391 * Search the source position corresponding to the beginning of a given line number
393 * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
395 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
397 * In case the given line number is inconsistent, answers -1.
399 public final int getLineStart(int lineNumber) {
400 if (lineEnds == null)
402 if (lineNumber >= lineEnds.length)
407 return initialPosition;
408 return lineEnds[lineNumber - 2] + 1;
409 // next line start one character behind the lineEnd of the previous line
412 public final boolean getNextChar(char testedChar) {
414 //handle the case of unicode.
415 //when a unicode appears then we must use a buffer that holds char
417 //At the end of this method currentCharacter holds the new visited char
418 //and currentPosition points right next after it
419 //Both previous lines are true if the currentCharacter is == to the
421 //On false, no side effect has occured.
422 //ALL getNextChar.... ARE OPTIMIZED COPIES
423 int temp = currentPosition;
425 currentCharacter = source[currentPosition++];
426 // if (((currentCharacter = source[currentPosition++]) == '\\')
427 // && (source[currentPosition] == 'u')) {
428 // //-------------unicode traitement ------------
429 // int c1, c2, c3, c4;
430 // int unicodeSize = 6;
431 // currentPosition++;
432 // while (source[currentPosition] == 'u') {
433 // currentPosition++;
437 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
439 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
441 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
443 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
445 // currentPosition = temp;
449 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
450 // if (currentCharacter != testedChar) {
451 // currentPosition = temp;
454 // unicodeAsBackSlash = currentCharacter == '\\';
456 // //need the unicode buffer
457 // if (withoutUnicodePtr == 0) {
458 // //buffer all the entries that have been left aside....
459 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
463 // withoutUnicodeBuffer,
465 // withoutUnicodePtr);
467 // //fill the buffer with the char
468 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
471 // } //-------------end unicode traitement--------------
473 if (currentCharacter != testedChar) {
474 currentPosition = temp;
477 unicodeAsBackSlash = false;
478 // if (withoutUnicodePtr != 0)
479 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
482 } catch (IndexOutOfBoundsException e) {
483 unicodeAsBackSlash = false;
484 currentPosition = temp;
489 public final int getNextChar(char testedChar1, char testedChar2) {
490 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
491 //test can be done with (x==0) for the first and (x>0) for the second
492 //handle the case of unicode.
493 //when a unicode appears then we must use a buffer that holds char
495 //At the end of this method currentCharacter holds the new visited char
496 //and currentPosition points right next after it
497 //Both previous lines are true if the currentCharacter is == to the
499 //On false, no side effect has occured.
500 //ALL getNextChar.... ARE OPTIMIZED COPIES
501 int temp = currentPosition;
504 currentCharacter = source[currentPosition++];
505 // if (((currentCharacter = source[currentPosition++]) == '\\')
506 // && (source[currentPosition] == 'u')) {
507 // //-------------unicode traitement ------------
508 // int c1, c2, c3, c4;
509 // int unicodeSize = 6;
510 // currentPosition++;
511 // while (source[currentPosition] == 'u') {
512 // currentPosition++;
516 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
518 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
520 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
522 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
524 // currentPosition = temp;
528 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
529 // if (currentCharacter == testedChar1)
531 // else if (currentCharacter == testedChar2)
534 // currentPosition = temp;
538 // //need the unicode buffer
539 // if (withoutUnicodePtr == 0) {
540 // //buffer all the entries that have been left aside....
541 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
545 // withoutUnicodeBuffer,
547 // withoutUnicodePtr);
549 // //fill the buffer with the char
550 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
552 // } //-------------end unicode traitement--------------
554 if (currentCharacter == testedChar1)
556 else if (currentCharacter == testedChar2)
559 currentPosition = temp;
562 // if (withoutUnicodePtr != 0)
563 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
566 } catch (IndexOutOfBoundsException e) {
567 currentPosition = temp;
572 public final boolean getNextCharAsDigit() {
574 //handle the case of unicode.
575 //when a unicode appears then we must use a buffer that holds char
577 //At the end of this method currentCharacter holds the new visited char
578 //and currentPosition points right next after it
579 //Both previous lines are true if the currentCharacter is a digit
580 //On false, no side effect has occured.
581 //ALL getNextChar.... ARE OPTIMIZED COPIES
582 int temp = currentPosition;
584 currentCharacter = source[currentPosition++];
585 // if (((currentCharacter = source[currentPosition++]) == '\\')
586 // && (source[currentPosition] == 'u')) {
587 // //-------------unicode traitement ------------
588 // int c1, c2, c3, c4;
589 // int unicodeSize = 6;
590 // currentPosition++;
591 // while (source[currentPosition] == 'u') {
592 // currentPosition++;
596 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
598 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
600 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
602 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
604 // currentPosition = temp;
608 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
609 // if (!Character.isDigit(currentCharacter)) {
610 // currentPosition = temp;
614 // //need the unicode buffer
615 // if (withoutUnicodePtr == 0) {
616 // //buffer all the entries that have been left aside....
617 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
621 // withoutUnicodeBuffer,
623 // withoutUnicodePtr);
625 // //fill the buffer with the char
626 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
628 // } //-------------end unicode traitement--------------
630 if (!Character.isDigit(currentCharacter)) {
631 currentPosition = temp;
634 // if (withoutUnicodePtr != 0)
635 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
638 } catch (IndexOutOfBoundsException e) {
639 currentPosition = temp;
644 public final boolean getNextCharAsDigit(int radix) {
646 //handle the case of unicode.
647 //when a unicode appears then we must use a buffer that holds char
649 //At the end of this method currentCharacter holds the new visited char
650 //and currentPosition points right next after it
651 //Both previous lines are true if the currentCharacter is a digit base on
653 //On false, no side effect has occured.
654 //ALL getNextChar.... ARE OPTIMIZED COPIES
655 int temp = currentPosition;
657 currentCharacter = source[currentPosition++];
658 // if (((currentCharacter = source[currentPosition++]) == '\\')
659 // && (source[currentPosition] == 'u')) {
660 // //-------------unicode traitement ------------
661 // int c1, c2, c3, c4;
662 // int unicodeSize = 6;
663 // currentPosition++;
664 // while (source[currentPosition] == 'u') {
665 // currentPosition++;
669 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
671 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
673 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
675 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
677 // currentPosition = temp;
681 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
682 // if (Character.digit(currentCharacter, radix) == -1) {
683 // currentPosition = temp;
687 // //need the unicode buffer
688 // if (withoutUnicodePtr == 0) {
689 // //buffer all the entries that have been left aside....
690 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
694 // withoutUnicodeBuffer,
696 // withoutUnicodePtr);
698 // //fill the buffer with the char
699 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
701 // } //-------------end unicode traitement--------------
703 if (Character.digit(currentCharacter, radix) == -1) {
704 currentPosition = temp;
707 // if (withoutUnicodePtr != 0)
708 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
711 } catch (IndexOutOfBoundsException e) {
712 currentPosition = temp;
717 public boolean getNextCharAsJavaIdentifierPart() {
719 //handle the case of unicode.
720 //when a unicode appears then we must use a buffer that holds char
722 //At the end of this method currentCharacter holds the new visited char
723 //and currentPosition points right next after it
724 //Both previous lines are true if the currentCharacter is a
725 // JavaIdentifierPart
726 //On false, no side effect has occured.
727 //ALL getNextChar.... ARE OPTIMIZED COPIES
728 int temp = currentPosition;
730 currentCharacter = source[currentPosition++];
731 // if (((currentCharacter = source[currentPosition++]) == '\\')
732 // && (source[currentPosition] == 'u')) {
733 // //-------------unicode traitement ------------
734 // int c1, c2, c3, c4;
735 // int unicodeSize = 6;
736 // currentPosition++;
737 // while (source[currentPosition] == 'u') {
738 // currentPosition++;
742 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
744 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
746 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
748 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
750 // currentPosition = temp;
754 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
755 // if (!isPHPIdentifierPart(currentCharacter)) {
756 // currentPosition = temp;
760 // //need the unicode buffer
761 // if (withoutUnicodePtr == 0) {
762 // //buffer all the entries that have been left aside....
763 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
767 // withoutUnicodeBuffer,
769 // withoutUnicodePtr);
771 // //fill the buffer with the char
772 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
774 // } //-------------end unicode traitement--------------
776 if (!isPHPIdentifierPart(currentCharacter)) {
777 currentPosition = temp;
780 // if (withoutUnicodePtr != 0)
781 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
784 } catch (IndexOutOfBoundsException e) {
785 currentPosition = temp;
790 public int getCastOrParen() {
791 int tempPosition = currentPosition;
792 char tempCharacter = currentCharacter;
793 int tempToken = TokenNameLPAREN;
794 boolean found = false;
795 StringBuffer buf = new StringBuffer();
798 currentCharacter = source[currentPosition++];
799 } while (currentCharacter == ' ' || currentCharacter == '\t');
800 while ((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
801 buf.append(currentCharacter);
802 currentCharacter = source[currentPosition++];
804 if (buf.length() >= 3 && buf.length() <= 7) {
805 char[] data = buf.toString().toCharArray();
807 switch (data.length) {
810 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
812 tempToken = TokenNameintCAST;
817 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
819 tempToken = TokenNameboolCAST;
822 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
824 tempToken = TokenNamedoubleCAST;
830 if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
831 && (data[++index] == 'y')) {
833 tempToken = TokenNamearrayCAST;
836 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
837 && (data[++index] == 't')) {
839 tempToken = TokenNameunsetCAST;
842 if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
843 && (data[++index] == 't')) {
845 tempToken = TokenNamedoubleCAST;
851 // object string double
852 if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
853 && (data[++index] == 'c') && (data[++index] == 't')) {
855 tempToken = TokenNameobjectCAST;
858 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
859 && (data[++index] == 'n') && (data[++index] == 'g')) {
861 tempToken = TokenNamestringCAST;
864 if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
865 && (data[++index] == 'l') && (data[++index] == 'e')) {
867 tempToken = TokenNamedoubleCAST;
874 if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
875 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
877 tempToken = TokenNameboolCAST;
880 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
881 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
883 tempToken = TokenNameintCAST;
889 while (currentCharacter == ' ' || currentCharacter == '\t') {
890 currentCharacter = source[currentPosition++];
892 if (currentCharacter == ')') {
897 } catch (IndexOutOfBoundsException e) {
899 currentCharacter = tempCharacter;
900 currentPosition = tempPosition;
901 return TokenNameLPAREN;
904 public void consumeStringInterpolated() throws InvalidInputException {
906 // consume next character
907 unicodeAsBackSlash = false;
908 currentCharacter = source[currentPosition++];
909 // if (((currentCharacter = source[currentPosition++]) == '\\')
910 // && (source[currentPosition] == 'u')) {
911 // getNextUnicodeChar();
913 // if (withoutUnicodePtr != 0) {
914 // withoutUnicodeBuffer[++withoutUnicodePtr] =
918 while (currentCharacter != '`') {
919 /** ** in PHP \r and \n are valid in string literals *** */
920 // if ((currentCharacter == '\n')
921 // || (currentCharacter == '\r')) {
922 // // relocate if finding another quote fairly close: thus unicode
923 // '/u000D' will be fully consumed
924 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
925 // if (currentPosition + lookAhead == source.length)
927 // if (source[currentPosition + lookAhead] == '\n')
929 // if (source[currentPosition + lookAhead] == '\"') {
930 // currentPosition += lookAhead + 1;
934 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
936 if (currentCharacter == '\\') {
937 int escapeSize = currentPosition;
938 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
939 //scanEscapeCharacter make a side effect on this value and we need
940 // the previous value few lines down this one
941 scanDoubleQuotedEscapeCharacter();
942 escapeSize = currentPosition - escapeSize;
943 if (withoutUnicodePtr == 0) {
944 //buffer all the entries that have been left aside....
945 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
946 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
947 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
948 } else { //overwrite the / in the buffer
949 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
950 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
951 // where only one is correct
955 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
956 if (recordLineSeparator) {
960 // consume next character
961 unicodeAsBackSlash = false;
962 currentCharacter = source[currentPosition++];
963 // if (((currentCharacter = source[currentPosition++]) == '\\')
964 // && (source[currentPosition] == 'u')) {
965 // getNextUnicodeChar();
967 if (withoutUnicodePtr != 0) {
968 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
972 } catch (IndexOutOfBoundsException e) {
973 // reset end position for error reporting
974 currentPosition -= 2;
975 throw new InvalidInputException(UNTERMINATED_STRING);
976 } catch (InvalidInputException e) {
977 if (e.getMessage().equals(INVALID_ESCAPE)) {
978 // relocate if finding another quote fairly close: thus unicode
979 // '/u000D' will be fully consumed
980 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
981 if (currentPosition + lookAhead == source.length)
983 if (source[currentPosition + lookAhead] == '\n')
985 if (source[currentPosition + lookAhead] == '`') {
986 currentPosition += lookAhead + 1;
993 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
994 // //$NON-NLS-?$ where ? is an
996 if (currentLine == null) {
997 currentLine = new NLSLine();
998 lines.add(currentLine);
1000 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1004 public void consumeStringConstant() throws InvalidInputException {
1006 // consume next character
1007 unicodeAsBackSlash = false;
1008 currentCharacter = source[currentPosition++];
1009 // if (((currentCharacter = source[currentPosition++]) == '\\')
1010 // && (source[currentPosition] == 'u')) {
1011 // getNextUnicodeChar();
1013 // if (withoutUnicodePtr != 0) {
1014 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1015 // currentCharacter;
1018 while (currentCharacter != '\'') {
1019 /** ** in PHP \r and \n are valid in string literals *** */
1020 // if ((currentCharacter == '\n')
1021 // || (currentCharacter == '\r')) {
1022 // // relocate if finding another quote fairly close: thus unicode
1023 // '/u000D' will be fully consumed
1024 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1025 // if (currentPosition + lookAhead == source.length)
1027 // if (source[currentPosition + lookAhead] == '\n')
1029 // if (source[currentPosition + lookAhead] == '\"') {
1030 // currentPosition += lookAhead + 1;
1034 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1036 if (currentCharacter == '\\') {
1037 int escapeSize = currentPosition;
1038 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1039 //scanEscapeCharacter make a side effect on this value and we need
1040 // the previous value few lines down this one
1041 scanSingleQuotedEscapeCharacter();
1042 escapeSize = currentPosition - escapeSize;
1043 if (withoutUnicodePtr == 0) {
1044 //buffer all the entries that have been left aside....
1045 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1046 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1047 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1048 } else { //overwrite the / in the buffer
1049 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1050 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1051 // where only one is correct
1052 withoutUnicodePtr--;
1055 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1056 if (recordLineSeparator) {
1057 pushLineSeparator();
1060 // consume next character
1061 unicodeAsBackSlash = false;
1062 currentCharacter = source[currentPosition++];
1063 // if (((currentCharacter = source[currentPosition++]) == '\\')
1064 // && (source[currentPosition] == 'u')) {
1065 // getNextUnicodeChar();
1067 if (withoutUnicodePtr != 0) {
1068 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1072 } catch (IndexOutOfBoundsException e) {
1073 // reset end position for error reporting
1074 currentPosition -= 2;
1075 throw new InvalidInputException(UNTERMINATED_STRING);
1076 } catch (InvalidInputException e) {
1077 if (e.getMessage().equals(INVALID_ESCAPE)) {
1078 // relocate if finding another quote fairly close: thus unicode
1079 // '/u000D' will be fully consumed
1080 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1081 if (currentPosition + lookAhead == source.length)
1083 if (source[currentPosition + lookAhead] == '\n')
1085 if (source[currentPosition + lookAhead] == '\'') {
1086 currentPosition += lookAhead + 1;
1093 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1094 // //$NON-NLS-?$ where ? is an
1096 if (currentLine == null) {
1097 currentLine = new NLSLine();
1098 lines.add(currentLine);
1100 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1104 public void consumeStringLiteral() throws InvalidInputException {
1106 // consume next character
1107 unicodeAsBackSlash = false;
1108 currentCharacter = source[currentPosition++];
1109 // if (((currentCharacter = source[currentPosition++]) == '\\')
1110 // && (source[currentPosition] == 'u')) {
1111 // getNextUnicodeChar();
1113 // if (withoutUnicodePtr != 0) {
1114 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1115 // currentCharacter;
1118 while (currentCharacter != '"') {
1119 /** ** in PHP \r and \n are valid in string literals *** */
1120 // if ((currentCharacter == '\n')
1121 // || (currentCharacter == '\r')) {
1122 // // relocate if finding another quote fairly close: thus unicode
1123 // '/u000D' will be fully consumed
1124 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1125 // if (currentPosition + lookAhead == source.length)
1127 // if (source[currentPosition + lookAhead] == '\n')
1129 // if (source[currentPosition + lookAhead] == '\"') {
1130 // currentPosition += lookAhead + 1;
1134 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1136 if (currentCharacter == '\\') {
1137 int escapeSize = currentPosition;
1138 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1139 //scanEscapeCharacter make a side effect on this value and we need
1140 // the previous value few lines down this one
1141 scanDoubleQuotedEscapeCharacter();
1142 escapeSize = currentPosition - escapeSize;
1143 if (withoutUnicodePtr == 0) {
1144 //buffer all the entries that have been left aside....
1145 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1146 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1147 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1148 } else { //overwrite the / in the buffer
1149 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1150 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1151 // where only one is correct
1152 withoutUnicodePtr--;
1155 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1156 if (recordLineSeparator) {
1157 pushLineSeparator();
1160 // consume next character
1161 unicodeAsBackSlash = false;
1162 currentCharacter = source[currentPosition++];
1163 // if (((currentCharacter = source[currentPosition++]) == '\\')
1164 // && (source[currentPosition] == 'u')) {
1165 // getNextUnicodeChar();
1167 if (withoutUnicodePtr != 0) {
1168 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1172 } catch (IndexOutOfBoundsException e) {
1173 // reset end position for error reporting
1174 currentPosition -= 2;
1175 throw new InvalidInputException(UNTERMINATED_STRING);
1176 } catch (InvalidInputException e) {
1177 if (e.getMessage().equals(INVALID_ESCAPE)) {
1178 // relocate if finding another quote fairly close: thus unicode
1179 // '/u000D' will be fully consumed
1180 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1181 if (currentPosition + lookAhead == source.length)
1183 if (source[currentPosition + lookAhead] == '\n')
1185 if (source[currentPosition + lookAhead] == '\"') {
1186 currentPosition += lookAhead + 1;
1193 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1194 // //$NON-NLS-?$ where ? is an
1196 if (currentLine == null) {
1197 currentLine = new NLSLine();
1198 lines.add(currentLine);
1200 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1204 public int getNextToken() throws InvalidInputException {
1205 phpExpressionTag = false;
1207 return getInlinedHTMLToken(currentPosition);
1210 this.wasAcr = false;
1212 jumpOverMethodBody();
1214 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1218 withoutUnicodePtr = 0;
1219 //start with a new token
1220 char encapsedChar = ' ';
1221 if (!encapsedStringStack.isEmpty()) {
1222 encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1224 if (encapsedChar != '$' && encapsedChar != ' ') {
1225 currentCharacter = source[currentPosition++];
1226 if (currentCharacter == encapsedChar) {
1227 switch (currentCharacter) {
1229 return TokenNameEncapsedString0;
1231 return TokenNameEncapsedString1;
1233 return TokenNameEncapsedString2;
1236 while (currentCharacter != encapsedChar) {
1237 /** ** in PHP \r and \n are valid in string literals *** */
1238 switch (currentCharacter) {
1240 int escapeSize = currentPosition;
1241 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1242 //scanEscapeCharacter make a side effect on this value and
1243 // we need the previous value few lines down this one
1244 scanDoubleQuotedEscapeCharacter();
1245 escapeSize = currentPosition - escapeSize;
1246 if (withoutUnicodePtr == 0) {
1247 //buffer all the entries that have been left aside....
1248 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1249 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1250 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1251 } else { //overwrite the / in the buffer
1252 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1253 if (backSlashAsUnicodeInString) { //there are TWO \ in
1254 withoutUnicodePtr--;
1260 if (recordLineSeparator) {
1261 pushLineSeparator();
1265 if (isPHPIdentifierStart(source[currentPosition]) || source[currentPosition] == '{') {
1267 encapsedStringStack.push(new Character('$'));
1268 return TokenNameSTRING;
1272 if (source[currentPosition] == '$') { // CURLY_OPEN
1274 encapsedStringStack.push(new Character('$'));
1275 return TokenNameSTRING;
1278 // consume next character
1279 unicodeAsBackSlash = false;
1280 currentCharacter = source[currentPosition++];
1281 if (withoutUnicodePtr != 0) {
1282 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1287 return TokenNameSTRING;
1289 // ---------Consume white space and handles startPosition---------
1290 int whiteStart = currentPosition;
1291 startPosition = currentPosition;
1292 currentCharacter = source[currentPosition++];
1293 if (encapsedChar == '$') {
1294 switch (currentCharacter) {
1296 currentCharacter = source[currentPosition++];
1297 return TokenNameSTRING;
1299 if (encapsedChar == '$') {
1300 if (getNextChar('$'))
1301 return TokenNameLBRACE_DOLLAR;
1303 return TokenNameLBRACE;
1305 return TokenNameRBRACE;
1307 return TokenNameLBRACKET;
1309 return TokenNameRBRACKET;
1311 if (tokenizeStrings) {
1312 consumeStringConstant();
1313 return TokenNameStringSingleQuote;
1315 return TokenNameEncapsedString1;
1317 return TokenNameEncapsedString2;
1319 if (tokenizeStrings) {
1320 consumeStringInterpolated();
1321 return TokenNameStringInterpolated;
1323 return TokenNameEncapsedString0;
1325 if (getNextChar('>'))
1326 return TokenNameMINUS_GREATER;
1327 return TokenNameSTRING;
1329 if (currentCharacter == '$') {
1330 int oldPosition = currentPosition;
1332 currentCharacter = source[currentPosition++];
1333 if (currentCharacter == '{') {
1334 return TokenNameDOLLAR_LBRACE;
1336 if (isPHPIdentifierStart(currentCharacter)) {
1337 return scanIdentifierOrKeyword(true);
1339 currentPosition = oldPosition;
1340 return TokenNameSTRING;
1342 } catch (IndexOutOfBoundsException e) {
1343 currentPosition = oldPosition;
1344 return TokenNameSTRING;
1347 if (isPHPIdentifierStart(currentCharacter))
1348 return scanIdentifierOrKeyword(false);
1349 if (Character.isDigit(currentCharacter))
1350 return scanNumber(false);
1351 return TokenNameERROR;
1354 // boolean isWhiteSpace;
1356 while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1357 startPosition = currentPosition;
1358 currentCharacter = source[currentPosition++];
1359 // if (((currentCharacter = source[currentPosition++]) == '\\')
1360 // && (source[currentPosition] == 'u')) {
1361 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1363 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1364 checkNonExternalizeString();
1365 if (recordLineSeparator) {
1366 pushLineSeparator();
1371 // isWhiteSpace = (currentCharacter == ' ')
1372 // || Character.isWhitespace(currentCharacter);
1375 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1376 // reposition scanner in case we are interested by spaces as tokens
1378 startPosition = whiteStart;
1379 return TokenNameWHITESPACE;
1381 //little trick to get out in the middle of a source compuation
1382 if (currentPosition > eofPosition)
1383 return TokenNameEOF;
1384 // ---------Identify the next token-------------
1385 switch (currentCharacter) {
1387 return getCastOrParen();
1389 return TokenNameRPAREN;
1391 return TokenNameLBRACE;
1393 return TokenNameRBRACE;
1395 return TokenNameLBRACKET;
1397 return TokenNameRBRACKET;
1399 return TokenNameSEMICOLON;
1401 return TokenNameCOMMA;
1403 if (getNextChar('='))
1404 return TokenNameDOT_EQUAL;
1405 if (getNextCharAsDigit())
1406 return scanNumber(true);
1407 return TokenNameDOT;
1410 if ((test = getNextChar('+', '=')) == 0)
1411 return TokenNamePLUS_PLUS;
1413 return TokenNamePLUS_EQUAL;
1414 return TokenNamePLUS;
1418 if ((test = getNextChar('-', '=')) == 0)
1419 return TokenNameMINUS_MINUS;
1421 return TokenNameMINUS_EQUAL;
1422 if (getNextChar('>'))
1423 return TokenNameMINUS_GREATER;
1424 return TokenNameMINUS;
1427 if (getNextChar('='))
1428 return TokenNameTWIDDLE_EQUAL;
1429 return TokenNameTWIDDLE;
1431 if (getNextChar('=')) {
1432 if (getNextChar('=')) {
1433 return TokenNameNOT_EQUAL_EQUAL;
1435 return TokenNameNOT_EQUAL;
1437 return TokenNameNOT;
1439 if (getNextChar('='))
1440 return TokenNameMULTIPLY_EQUAL;
1441 return TokenNameMULTIPLY;
1443 if (getNextChar('='))
1444 return TokenNameREMAINDER_EQUAL;
1445 return TokenNameREMAINDER;
1447 int oldPosition = currentPosition;
1449 currentCharacter = source[currentPosition++];
1450 } catch (IndexOutOfBoundsException e) {
1451 currentPosition = oldPosition;
1452 return TokenNameLESS;
1454 switch (currentCharacter) {
1456 return TokenNameLESS_EQUAL;
1458 return TokenNameNOT_EQUAL;
1460 if (getNextChar('='))
1461 return TokenNameLEFT_SHIFT_EQUAL;
1462 if (getNextChar('<')) {
1463 currentCharacter = source[currentPosition++];
1464 while (Character.isWhitespace(currentCharacter)) {
1465 currentCharacter = source[currentPosition++];
1467 int heredocStart = currentPosition - 1;
1468 int heredocLength = 0;
1469 if (isPHPIdentifierStart(currentCharacter)) {
1470 currentCharacter = source[currentPosition++];
1472 return TokenNameERROR;
1474 while (isPHPIdentifierPart(currentCharacter)) {
1475 currentCharacter = source[currentPosition++];
1477 heredocLength = currentPosition - heredocStart - 1;
1478 // heredoc end-tag determination
1479 boolean endTag = true;
1482 ch = source[currentPosition++];
1483 if (ch == '\r' || ch == '\n') {
1484 if (recordLineSeparator) {
1485 pushLineSeparator();
1489 for (int i = 0; i < heredocLength; i++) {
1490 if (source[currentPosition + i] != source[heredocStart + i]) {
1496 currentPosition += heredocLength - 1;
1497 currentCharacter = source[currentPosition++];
1498 break; // do...while loop
1504 return TokenNameHEREDOC;
1506 return TokenNameLEFT_SHIFT;
1508 currentPosition = oldPosition;
1509 return TokenNameLESS;
1513 if ((test = getNextChar('=', '>')) == 0)
1514 return TokenNameGREATER_EQUAL;
1516 if ((test = getNextChar('=', '>')) == 0)
1517 return TokenNameRIGHT_SHIFT_EQUAL;
1518 return TokenNameRIGHT_SHIFT;
1520 return TokenNameGREATER;
1523 if (getNextChar('=')) {
1524 if (getNextChar('=')) {
1525 return TokenNameEQUAL_EQUAL_EQUAL;
1527 return TokenNameEQUAL_EQUAL;
1529 if (getNextChar('>'))
1530 return TokenNameEQUAL_GREATER;
1531 return TokenNameEQUAL;
1534 if ((test = getNextChar('&', '=')) == 0)
1535 return TokenNameAND_AND;
1537 return TokenNameAND_EQUAL;
1538 return TokenNameAND;
1542 if ((test = getNextChar('|', '=')) == 0)
1543 return TokenNameOR_OR;
1545 return TokenNameOR_EQUAL;
1549 if (getNextChar('='))
1550 return TokenNameXOR_EQUAL;
1551 return TokenNameXOR;
1553 if (getNextChar('>')) {
1555 if (currentPosition == source.length) {
1557 return TokenNameINLINE_HTML;
1559 return getInlinedHTMLToken(currentPosition - 2);
1561 return TokenNameQUESTION;
1563 if (getNextChar(':'))
1564 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1565 return TokenNameCOLON;
1569 consumeStringConstant();
1570 return TokenNameStringSingleQuote;
1572 if (tokenizeStrings) {
1573 consumeStringLiteral();
1574 return TokenNameStringDoubleQuote;
1576 return TokenNameEncapsedString2;
1578 if (tokenizeStrings) {
1579 consumeStringInterpolated();
1580 return TokenNameStringInterpolated;
1582 return TokenNameEncapsedString0;
1585 char startChar = currentCharacter;
1586 if (getNextChar('=') && startChar == '/') {
1587 return TokenNameDIVIDE_EQUAL;
1590 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1592 this.lastCommentLinePosition = this.currentPosition;
1593 int endPositionForLineComment = 0;
1594 try { //get the next char
1595 currentCharacter = source[currentPosition++];
1596 // if (((currentCharacter = source[currentPosition++])
1598 // && (source[currentPosition] == 'u')) {
1599 // //-------------unicode traitement ------------
1600 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1601 // currentPosition++;
1602 // while (source[currentPosition] == 'u') {
1603 // currentPosition++;
1606 // Character.getNumericValue(source[currentPosition++]))
1610 // Character.getNumericValue(source[currentPosition++]))
1614 // Character.getNumericValue(source[currentPosition++]))
1618 // Character.getNumericValue(source[currentPosition++]))
1622 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1624 // currentCharacter =
1625 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1628 //handle the \\u case manually into comment
1629 // if (currentCharacter == '\\') {
1630 // if (source[currentPosition] == '\\')
1631 // currentPosition++;
1632 // } //jump over the \\
1633 boolean isUnicode = false;
1634 while (currentCharacter != '\r' && currentCharacter != '\n') {
1635 this.lastCommentLinePosition = this.currentPosition;
1636 if (currentCharacter == '?') {
1637 if (getNextChar('>')) {
1638 startPosition = currentPosition - 2;
1640 return TokenNameINLINE_HTML;
1645 currentCharacter = source[currentPosition++];
1646 // if (((currentCharacter = source[currentPosition++])
1648 // && (source[currentPosition] == 'u')) {
1649 // isUnicode = true;
1650 // //-------------unicode traitement ------------
1651 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1652 // currentPosition++;
1653 // while (source[currentPosition] == 'u') {
1654 // currentPosition++;
1657 // Character.getNumericValue(source[currentPosition++]))
1661 // Character.getNumericValue(
1662 // source[currentPosition++]))
1666 // Character.getNumericValue(
1667 // source[currentPosition++]))
1671 // Character.getNumericValue(
1672 // source[currentPosition++]))
1676 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1678 // currentCharacter =
1679 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1682 //handle the \\u case manually into comment
1683 // if (currentCharacter == '\\') {
1684 // if (source[currentPosition] == '\\')
1685 // currentPosition++;
1686 // } //jump over the \\
1689 endPositionForLineComment = currentPosition - 6;
1691 endPositionForLineComment = currentPosition - 1;
1693 // recordComment(false);
1694 recordComment(TokenNameCOMMENT_LINE);
1695 if (this.taskTags != null)
1696 checkTaskTag(this.startPosition, this.currentPosition);
1697 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1698 checkNonExternalizeString();
1699 if (recordLineSeparator) {
1701 pushUnicodeLineSeparator();
1703 pushLineSeparator();
1709 if (tokenizeComments) {
1711 currentPosition = endPositionForLineComment;
1712 // reset one character behind
1714 return TokenNameCOMMENT_LINE;
1716 } catch (IndexOutOfBoundsException e) { //an eof will them
1718 if (tokenizeComments) {
1720 // reset one character behind
1721 return TokenNameCOMMENT_LINE;
1727 //traditional and annotation comment
1728 boolean isJavadoc = false, star = false;
1729 // consume next character
1730 unicodeAsBackSlash = false;
1731 currentCharacter = source[currentPosition++];
1732 // if (((currentCharacter = source[currentPosition++]) ==
1734 // && (source[currentPosition] == 'u')) {
1735 // getNextUnicodeChar();
1737 // if (withoutUnicodePtr != 0) {
1738 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1739 // currentCharacter;
1742 if (currentCharacter == '*') {
1746 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1747 checkNonExternalizeString();
1748 if (recordLineSeparator) {
1749 pushLineSeparator();
1754 try { //get the next char
1755 currentCharacter = source[currentPosition++];
1756 // if (((currentCharacter = source[currentPosition++])
1758 // && (source[currentPosition] == 'u')) {
1759 // //-------------unicode traitement ------------
1760 // getNextUnicodeChar();
1762 //handle the \\u case manually into comment
1763 // if (currentCharacter == '\\') {
1764 // if (source[currentPosition] == '\\')
1765 // currentPosition++;
1766 // //jump over the \\
1768 // empty comment is not a javadoc /**/
1769 if (currentCharacter == '/') {
1772 //loop until end of comment */
1773 while ((currentCharacter != '/') || (!star)) {
1774 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1775 checkNonExternalizeString();
1776 if (recordLineSeparator) {
1777 pushLineSeparator();
1782 star = currentCharacter == '*';
1784 currentCharacter = source[currentPosition++];
1785 // if (((currentCharacter = source[currentPosition++])
1787 // && (source[currentPosition] == 'u')) {
1788 // //-------------unicode traitement ------------
1789 // getNextUnicodeChar();
1791 //handle the \\u case manually into comment
1792 // if (currentCharacter == '\\') {
1793 // if (source[currentPosition] == '\\')
1794 // currentPosition++;
1795 // } //jump over the \\
1797 //recordComment(isJavadoc);
1799 recordComment(TokenNameCOMMENT_PHPDOC);
1801 recordComment(TokenNameCOMMENT_BLOCK);
1804 if (tokenizeComments) {
1806 return TokenNameCOMMENT_PHPDOC;
1807 return TokenNameCOMMENT_BLOCK;
1810 if (this.taskTags != null) {
1811 checkTaskTag(this.startPosition, this.currentPosition);
1813 } catch (IndexOutOfBoundsException e) {
1814 // reset end position for error reporting
1815 currentPosition -= 2;
1816 throw new InvalidInputException(UNTERMINATED_COMMENT);
1820 return TokenNameDIVIDE;
1824 return TokenNameEOF;
1825 //the atEnd may not be <currentPosition == source.length> if
1826 // source is only some part of a real (external) stream
1827 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1829 if (currentCharacter == '$') {
1830 int oldPosition = currentPosition;
1832 currentCharacter = source[currentPosition++];
1833 if (isPHPIdentifierStart(currentCharacter)) {
1834 return scanIdentifierOrKeyword(true);
1836 currentPosition = oldPosition;
1837 return TokenNameDOLLAR;
1839 } catch (IndexOutOfBoundsException e) {
1840 currentPosition = oldPosition;
1841 return TokenNameDOLLAR;
1844 if (isPHPIdentifierStart(currentCharacter))
1845 return scanIdentifierOrKeyword(false);
1846 if (Character.isDigit(currentCharacter))
1847 return scanNumber(false);
1848 return TokenNameERROR;
1851 } //-----------------end switch while try--------------------
1852 catch (IndexOutOfBoundsException e) {
1855 return TokenNameEOF;
1860 * @throws InvalidInputException
1862 private int getInlinedHTMLToken(int start) throws InvalidInputException {
1863 if (currentPosition > source.length) {
1864 currentPosition = source.length;
1865 return TokenNameEOF;
1867 startPosition = start;
1870 currentCharacter = source[currentPosition++];
1871 if (currentCharacter == '<') {
1872 if (getNextChar('?')) {
1873 currentCharacter = source[currentPosition++];
1874 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1875 if (currentCharacter != '=') { // <?=
1878 phpExpressionTag = true;
1881 if (ignorePHPOneLiner) { // for CodeFormatter
1882 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1884 return TokenNameINLINE_HTML;
1888 return TokenNameINLINE_HTML;
1891 // boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
1893 int test = getNextChar('H', 'h');
1895 test = getNextChar('P', 'p');
1898 if (ignorePHPOneLiner) {
1899 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1901 return TokenNameINLINE_HTML;
1905 return TokenNameINLINE_HTML;
1913 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1914 if (recordLineSeparator) {
1915 pushLineSeparator();
1920 } //-----------------while--------------------
1922 return TokenNameINLINE_HTML;
1923 } //-----------------try--------------------
1924 catch (IndexOutOfBoundsException e) {
1925 startPosition = start;
1929 return TokenNameINLINE_HTML;
1935 private int lookAheadLinePHPTag() {
1936 // check if the PHP is only in this line (for CodeFormatter)
1937 int currentPositionInLine = currentPosition;
1938 char previousCharInLine = ' ';
1939 char currentCharInLine = ' ';
1940 boolean singleQuotedStringActive = false;
1941 boolean doubleQuotedStringActive = false;
1944 // look ahead in this line
1946 previousCharInLine = currentCharInLine;
1947 currentCharInLine = source[currentPositionInLine++];
1948 switch (currentCharInLine) {
1950 if (previousCharInLine == '?') {
1951 // update the scanner's current Position in the source
1952 currentPosition = currentPositionInLine;
1953 // use as "dummy" token
1954 return TokenNameEOF;
1958 if (doubleQuotedStringActive) {
1959 // ignore escaped characters in double quoted strings
1960 previousCharInLine = currentCharInLine;
1961 currentCharInLine = source[currentPositionInLine++];
1964 if (doubleQuotedStringActive) {
1965 doubleQuotedStringActive = false;
1967 if (!singleQuotedStringActive) {
1968 doubleQuotedStringActive = true;
1973 if (singleQuotedStringActive) {
1974 if (previousCharInLine != '\\') {
1975 singleQuotedStringActive = false;
1978 if (!doubleQuotedStringActive) {
1979 singleQuotedStringActive = true;
1985 return TokenNameINLINE_HTML;
1987 if (!singleQuotedStringActive && !doubleQuotedStringActive) {
1989 return TokenNameINLINE_HTML;
1993 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
1995 return TokenNameINLINE_HTML;
1999 if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2001 return TokenNameINLINE_HTML;
2006 } catch (IndexOutOfBoundsException e) {
2008 currentPosition = currentPositionInLine;
2009 return TokenNameINLINE_HTML;
2013 // public final void getNextUnicodeChar()
2014 // throws IndexOutOfBoundsException, InvalidInputException {
2016 // //handle the case of unicode.
2017 // //when a unicode appears then we must use a buffer that holds char
2019 // //At the end of this method currentCharacter holds the new visited char
2020 // //and currentPosition points right next after it
2022 // //ALL getNextChar.... ARE OPTIMIZED COPIES
2024 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2025 // currentPosition++;
2026 // while (source[currentPosition] == 'u') {
2027 // currentPosition++;
2031 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2033 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2035 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2037 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2039 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2041 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2042 // //need the unicode buffer
2043 // if (withoutUnicodePtr == 0) {
2044 // //buffer all the entries that have been left aside....
2045 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2046 // System.arraycopy(
2049 // withoutUnicodeBuffer,
2051 // withoutUnicodePtr);
2053 // //fill the buffer with the char
2054 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2056 // unicodeAsBackSlash = currentCharacter == '\\';
2059 * Tokenize a method body, assuming that curly brackets are properly balanced.
2061 public final void jumpOverMethodBody() {
2062 this.wasAcr = false;
2065 while (true) { //loop for jumping over comments
2066 // ---------Consume white space and handles startPosition---------
2067 boolean isWhiteSpace;
2069 startPosition = currentPosition;
2070 currentCharacter = source[currentPosition++];
2071 // if (((currentCharacter = source[currentPosition++]) == '\\')
2072 // && (source[currentPosition] == 'u')) {
2073 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2075 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2076 pushLineSeparator();
2077 isWhiteSpace = Character.isWhitespace(currentCharacter);
2079 } while (isWhiteSpace);
2080 // -------consume token until } is found---------
2081 switch (currentCharacter) {
2092 test = getNextChar('\\');
2095 scanDoubleQuotedEscapeCharacter();
2096 } catch (InvalidInputException ex) {
2100 // try { // consume next character
2101 unicodeAsBackSlash = false;
2102 currentCharacter = source[currentPosition++];
2103 // if (((currentCharacter = source[currentPosition++]) == '\\')
2104 // && (source[currentPosition] == 'u')) {
2105 // getNextUnicodeChar();
2107 if (withoutUnicodePtr != 0) {
2108 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2111 // } catch (InvalidInputException ex) {
2119 // try { // consume next character
2120 unicodeAsBackSlash = false;
2121 currentCharacter = source[currentPosition++];
2122 // if (((currentCharacter = source[currentPosition++]) == '\\')
2123 // && (source[currentPosition] == 'u')) {
2124 // getNextUnicodeChar();
2126 if (withoutUnicodePtr != 0) {
2127 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2130 // } catch (InvalidInputException ex) {
2132 while (currentCharacter != '"') {
2133 if (currentCharacter == '\r') {
2134 if (source[currentPosition] == '\n')
2137 // the string cannot go further that the line
2139 if (currentCharacter == '\n') {
2141 // the string cannot go further that the line
2143 if (currentCharacter == '\\') {
2145 scanDoubleQuotedEscapeCharacter();
2146 } catch (InvalidInputException ex) {
2150 // try { // consume next character
2151 unicodeAsBackSlash = false;
2152 currentCharacter = source[currentPosition++];
2153 // if (((currentCharacter = source[currentPosition++]) == '\\')
2154 // && (source[currentPosition] == 'u')) {
2155 // getNextUnicodeChar();
2157 if (withoutUnicodePtr != 0) {
2158 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2161 // } catch (InvalidInputException ex) {
2164 } catch (IndexOutOfBoundsException e) {
2170 if ((test = getNextChar('/', '*')) == 0) {
2174 currentCharacter = source[currentPosition++];
2175 // if (((currentCharacter = source[currentPosition++]) ==
2177 // && (source[currentPosition] == 'u')) {
2178 // //-------------unicode traitement ------------
2179 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2180 // currentPosition++;
2181 // while (source[currentPosition] == 'u') {
2182 // currentPosition++;
2185 // Character.getNumericValue(source[currentPosition++]))
2189 // Character.getNumericValue(source[currentPosition++]))
2193 // Character.getNumericValue(source[currentPosition++]))
2197 // Character.getNumericValue(source[currentPosition++]))
2200 // //error don't care of the value
2201 // currentCharacter = 'A';
2202 // } //something different from \n and \r
2204 // currentCharacter =
2205 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2208 while (currentCharacter != '\r' && currentCharacter != '\n') {
2210 currentCharacter = source[currentPosition++];
2211 // if (((currentCharacter = source[currentPosition++])
2213 // && (source[currentPosition] == 'u')) {
2214 // //-------------unicode traitement ------------
2215 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2216 // currentPosition++;
2217 // while (source[currentPosition] == 'u') {
2218 // currentPosition++;
2221 // Character.getNumericValue(source[currentPosition++]))
2225 // Character.getNumericValue(source[currentPosition++]))
2229 // Character.getNumericValue(source[currentPosition++]))
2233 // Character.getNumericValue(source[currentPosition++]))
2236 // //error don't care of the value
2237 // currentCharacter = 'A';
2238 // } //something different from \n and \r
2240 // currentCharacter =
2241 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2245 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2246 pushLineSeparator();
2247 } catch (IndexOutOfBoundsException e) {
2248 } //an eof will them be generated
2252 //traditional and annotation comment
2253 boolean star = false;
2254 // try { // consume next character
2255 unicodeAsBackSlash = false;
2256 currentCharacter = source[currentPosition++];
2257 // if (((currentCharacter = source[currentPosition++]) == '\\')
2258 // && (source[currentPosition] == 'u')) {
2259 // getNextUnicodeChar();
2261 if (withoutUnicodePtr != 0) {
2262 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2265 // } catch (InvalidInputException ex) {
2267 if (currentCharacter == '*') {
2270 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2271 pushLineSeparator();
2272 try { //get the next char
2273 currentCharacter = source[currentPosition++];
2274 // if (((currentCharacter = source[currentPosition++]) ==
2276 // && (source[currentPosition] == 'u')) {
2277 // //-------------unicode traitement ------------
2278 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2279 // currentPosition++;
2280 // while (source[currentPosition] == 'u') {
2281 // currentPosition++;
2284 // Character.getNumericValue(source[currentPosition++]))
2288 // Character.getNumericValue(source[currentPosition++]))
2292 // Character.getNumericValue(source[currentPosition++]))
2296 // Character.getNumericValue(source[currentPosition++]))
2299 // //error don't care of the value
2300 // currentCharacter = 'A';
2301 // } //something different from * and /
2303 // currentCharacter =
2304 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2307 //loop until end of comment */
2308 while ((currentCharacter != '/') || (!star)) {
2309 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2310 pushLineSeparator();
2311 star = currentCharacter == '*';
2313 currentCharacter = source[currentPosition++];
2314 // if (((currentCharacter = source[currentPosition++])
2316 // && (source[currentPosition] == 'u')) {
2317 // //-------------unicode traitement ------------
2318 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2319 // currentPosition++;
2320 // while (source[currentPosition] == 'u') {
2321 // currentPosition++;
2324 // Character.getNumericValue(source[currentPosition++]))
2328 // Character.getNumericValue(source[currentPosition++]))
2332 // Character.getNumericValue(source[currentPosition++]))
2336 // Character.getNumericValue(source[currentPosition++]))
2339 // //error don't care of the value
2340 // currentCharacter = 'A';
2341 // } //something different from * and /
2343 // currentCharacter =
2344 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2348 } catch (IndexOutOfBoundsException e) {
2356 if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
2358 scanIdentifierOrKeyword((currentCharacter == '$'));
2359 } catch (InvalidInputException ex) {
2364 if (Character.isDigit(currentCharacter)) {
2367 } catch (InvalidInputException ex) {
2374 //-----------------end switch while try--------------------
2375 } catch (IndexOutOfBoundsException e) {
2376 } catch (InvalidInputException e) {
2381 // public final boolean jumpOverUnicodeWhiteSpace()
2382 // throws InvalidInputException {
2384 // //handle the case of unicode. Jump over the next whiteSpace
2385 // //making startPosition pointing on the next available char
2386 // //On false, the currentCharacter is filled up with a potential
2390 // this.wasAcr = false;
2391 // int c1, c2, c3, c4;
2392 // int unicodeSize = 6;
2393 // currentPosition++;
2394 // while (source[currentPosition] == 'u') {
2395 // currentPosition++;
2399 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2401 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2403 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2405 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2407 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2410 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2411 // if (recordLineSeparator
2412 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2413 // pushLineSeparator();
2414 // if (Character.isWhitespace(currentCharacter))
2417 // //buffer the new char which is not a white space
2418 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2419 // //withoutUnicodePtr == 1 is true here
2421 // } catch (IndexOutOfBoundsException e) {
2422 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2425 public final int[] getLineEnds() {
2426 //return a bounded copy of this.lineEnds
2428 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2432 public char[] getSource() {
2436 public static boolean isIdentifierOrKeyword(int token) {
2437 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2440 final char[] optimizedCurrentTokenSource1() {
2441 //return always the same char[] build only once
2442 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2443 char charOne = source[startPosition];
2498 return new char[] { charOne };
2502 final char[] optimizedCurrentTokenSource2() {
2504 c0 = source[startPosition];
2505 c1 = source[startPosition + 1];
2507 //return always the same char[] build only once
2508 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2511 return charArray_va;
2513 return charArray_vb;
2515 return charArray_vc;
2517 return charArray_vd;
2519 return charArray_ve;
2521 return charArray_vf;
2523 return charArray_vg;
2525 return charArray_vh;
2527 return charArray_vi;
2529 return charArray_vj;
2531 return charArray_vk;
2533 return charArray_vl;
2535 return charArray_vm;
2537 return charArray_vn;
2539 return charArray_vo;
2541 return charArray_vp;
2543 return charArray_vq;
2545 return charArray_vr;
2547 return charArray_vs;
2549 return charArray_vt;
2551 return charArray_vu;
2553 return charArray_vv;
2555 return charArray_vw;
2557 return charArray_vx;
2559 return charArray_vy;
2561 return charArray_vz;
2564 //try to return the same char[] build only once
2565 int hash = ((c0 << 6) + c1) % TableSize;
2566 char[][] table = charArray_length[0][hash];
2568 while (++i < InternalTableSize) {
2569 char[] charArray = table[i];
2570 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2573 //---------other side---------
2575 int max = newEntry2;
2576 while (++i <= max) {
2577 char[] charArray = table[i];
2578 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2581 //--------add the entry-------
2582 if (++max >= InternalTableSize)
2585 table[max] = (r = new char[] { c0, c1 });
2590 final char[] optimizedCurrentTokenSource3() {
2591 //try to return the same char[] build only once
2593 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2595 char[][] table = charArray_length[1][hash];
2597 while (++i < InternalTableSize) {
2598 char[] charArray = table[i];
2599 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2602 //---------other side---------
2604 int max = newEntry3;
2605 while (++i <= max) {
2606 char[] charArray = table[i];
2607 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2610 //--------add the entry-------
2611 if (++max >= InternalTableSize)
2614 table[max] = (r = new char[] { c0, c1, c2 });
2619 final char[] optimizedCurrentTokenSource4() {
2620 //try to return the same char[] build only once
2621 char c0, c1, c2, c3;
2622 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2623 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2625 char[][] table = charArray_length[2][(int) hash];
2627 while (++i < InternalTableSize) {
2628 char[] charArray = table[i];
2629 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2632 //---------other side---------
2634 int max = newEntry4;
2635 while (++i <= max) {
2636 char[] charArray = table[i];
2637 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2640 //--------add the entry-------
2641 if (++max >= InternalTableSize)
2644 table[max] = (r = new char[] { c0, c1, c2, c3 });
2649 final char[] optimizedCurrentTokenSource5() {
2650 //try to return the same char[] build only once
2651 char c0, c1, c2, c3, c4;
2652 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2653 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2655 char[][] table = charArray_length[3][(int) hash];
2657 while (++i < InternalTableSize) {
2658 char[] charArray = table[i];
2659 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2662 //---------other side---------
2664 int max = newEntry5;
2665 while (++i <= max) {
2666 char[] charArray = table[i];
2667 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2670 //--------add the entry-------
2671 if (++max >= InternalTableSize)
2674 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2679 final char[] optimizedCurrentTokenSource6() {
2680 //try to return the same char[] build only once
2681 char c0, c1, c2, c3, c4, c5;
2682 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2683 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2684 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2686 char[][] table = charArray_length[4][(int) hash];
2688 while (++i < InternalTableSize) {
2689 char[] charArray = table[i];
2690 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2691 && (c5 == charArray[5]))
2694 //---------other side---------
2696 int max = newEntry6;
2697 while (++i <= max) {
2698 char[] charArray = table[i];
2699 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2700 && (c5 == charArray[5]))
2703 //--------add the entry-------
2704 if (++max >= InternalTableSize)
2707 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2712 public final void pushLineSeparator() throws InvalidInputException {
2713 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2714 final int INCREMENT = 250;
2715 if (this.checkNonExternalizedStringLiterals) {
2716 // reinitialize the current line for non externalize strings purpose
2719 //currentCharacter is at position currentPosition-1
2721 if (currentCharacter == '\r') {
2722 int separatorPos = currentPosition - 1;
2723 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2725 //System.out.println("CR-" + separatorPos);
2727 lineEnds[++linePtr] = separatorPos;
2728 } catch (IndexOutOfBoundsException e) {
2729 //linePtr value is correct
2730 int oldLength = lineEnds.length;
2731 int[] old = lineEnds;
2732 lineEnds = new int[oldLength + INCREMENT];
2733 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2734 lineEnds[linePtr] = separatorPos;
2736 // look-ahead for merged cr+lf
2738 if (source[currentPosition] == '\n') {
2739 //System.out.println("look-ahead LF-" + currentPosition);
2740 lineEnds[linePtr] = currentPosition;
2746 } catch (IndexOutOfBoundsException e) {
2751 if (currentCharacter == '\n') {
2752 //must merge eventual cr followed by lf
2753 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2754 //System.out.println("merge LF-" + (currentPosition - 1));
2755 lineEnds[linePtr] = currentPosition - 1;
2757 int separatorPos = currentPosition - 1;
2758 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2760 // System.out.println("LF-" + separatorPos);
2762 lineEnds[++linePtr] = separatorPos;
2763 } catch (IndexOutOfBoundsException e) {
2764 //linePtr value is correct
2765 int oldLength = lineEnds.length;
2766 int[] old = lineEnds;
2767 lineEnds = new int[oldLength + INCREMENT];
2768 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2769 lineEnds[linePtr] = separatorPos;
2777 public final void pushUnicodeLineSeparator() {
2778 // isUnicode means that the \r or \n has been read as a unicode character
2779 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2780 final int INCREMENT = 250;
2781 //currentCharacter is at position currentPosition-1
2782 if (this.checkNonExternalizedStringLiterals) {
2783 // reinitialize the current line for non externalize strings purpose
2787 if (currentCharacter == '\r') {
2788 int separatorPos = currentPosition - 6;
2789 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2791 //System.out.println("CR-" + separatorPos);
2793 lineEnds[++linePtr] = separatorPos;
2794 } catch (IndexOutOfBoundsException e) {
2795 //linePtr value is correct
2796 int oldLength = lineEnds.length;
2797 int[] old = lineEnds;
2798 lineEnds = new int[oldLength + INCREMENT];
2799 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2800 lineEnds[linePtr] = separatorPos;
2802 // look-ahead for merged cr+lf
2803 if (source[currentPosition] == '\n') {
2804 //System.out.println("look-ahead LF-" + currentPosition);
2805 lineEnds[linePtr] = currentPosition;
2813 if (currentCharacter == '\n') {
2814 //must merge eventual cr followed by lf
2815 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2816 //System.out.println("merge LF-" + (currentPosition - 1));
2817 lineEnds[linePtr] = currentPosition - 6;
2819 int separatorPos = currentPosition - 6;
2820 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2822 // System.out.println("LF-" + separatorPos);
2824 lineEnds[++linePtr] = separatorPos;
2825 } catch (IndexOutOfBoundsException e) {
2826 //linePtr value is correct
2827 int oldLength = lineEnds.length;
2828 int[] old = lineEnds;
2829 lineEnds = new int[oldLength + INCREMENT];
2830 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2831 lineEnds[linePtr] = separatorPos;
2839 public void recordComment(int token) {
2841 int stopPosition = this.currentPosition;
2843 case TokenNameCOMMENT_LINE:
2844 stopPosition = -this.lastCommentLinePosition;
2846 case TokenNameCOMMENT_BLOCK:
2847 stopPosition = -this.currentPosition;
2851 // a new comment is recorded
2852 int length = this.commentStops.length;
2853 if (++this.commentPtr >= length) {
2854 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2855 //grows the positions buffers too
2856 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2858 this.commentStops[this.commentPtr] = stopPosition;
2859 this.commentStarts[this.commentPtr] = this.startPosition;
2862 // public final void recordComment(boolean isJavadoc) {
2863 // // a new annotation comment is recorded
2865 // commentStops[++commentPtr] = isJavadoc
2866 // ? currentPosition
2867 // : -currentPosition;
2868 // } catch (IndexOutOfBoundsException e) {
2869 // int oldStackLength = commentStops.length;
2870 // int[] oldStack = commentStops;
2871 // commentStops = new int[oldStackLength + 30];
2872 // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2873 // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2874 // //grows the positions buffers too
2875 // int[] old = commentStarts;
2876 // commentStarts = new int[oldStackLength + 30];
2877 // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2879 // //the buffer is of a correct size here
2880 // commentStarts[commentPtr] = startPosition;
2882 public void resetTo(int begin, int end) {
2883 //reset the scanner to a given position where it may rescan again
2885 initialPosition = startPosition = currentPosition = begin;
2886 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2887 commentPtr = -1; // reset comment stack
2890 public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2891 // the string with "\\u" is a legal string of two chars \ and u
2892 //thus we use a direct access to the source (for regular cases).
2893 // if (unicodeAsBackSlash) {
2894 // // consume next character
2895 // unicodeAsBackSlash = false;
2896 // if (((currentCharacter = source[currentPosition++]) == '\\')
2897 // && (source[currentPosition] == 'u')) {
2898 // getNextUnicodeChar();
2900 // if (withoutUnicodePtr != 0) {
2901 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2905 currentCharacter = source[currentPosition++];
2906 switch (currentCharacter) {
2908 currentCharacter = '\'';
2911 currentCharacter = '\\';
2914 currentCharacter = '\\';
2919 public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2920 // the string with "\\u" is a legal string of two chars \ and u
2921 //thus we use a direct access to the source (for regular cases).
2922 // if (unicodeAsBackSlash) {
2923 // // consume next character
2924 // unicodeAsBackSlash = false;
2925 // if (((currentCharacter = source[currentPosition++]) == '\\')
2926 // && (source[currentPosition] == 'u')) {
2927 // getNextUnicodeChar();
2929 // if (withoutUnicodePtr != 0) {
2930 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2934 currentCharacter = source[currentPosition++];
2935 switch (currentCharacter) {
2937 // currentCharacter = '\b';
2940 currentCharacter = '\t';
2943 currentCharacter = '\n';
2946 // currentCharacter = '\f';
2949 currentCharacter = '\r';
2952 currentCharacter = '\"';
2955 currentCharacter = '\'';
2958 currentCharacter = '\\';
2961 currentCharacter = '$';
2964 // -----------octal escape--------------
2966 // OctalDigit OctalDigit
2967 // ZeroToThree OctalDigit OctalDigit
2968 int number = Character.getNumericValue(currentCharacter);
2969 if (number >= 0 && number <= 7) {
2970 boolean zeroToThreeNot = number > 3;
2971 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2972 int digit = Character.getNumericValue(currentCharacter);
2973 if (digit >= 0 && digit <= 7) {
2974 number = (number * 8) + digit;
2975 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2976 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2977 // Digit --> ignore last character
2980 digit = Character.getNumericValue(currentCharacter);
2981 if (digit >= 0 && digit <= 7) {
2982 // has read \ZeroToThree OctalDigit OctalDigit
2983 number = (number * 8) + digit;
2984 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2985 // --> ignore last character
2989 } else { // has read \OctalDigit NonDigit--> ignore last
2993 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2997 } else { // has read \OctalDigit --> ignore last character
3001 throw new InvalidInputException(INVALID_ESCAPE);
3002 currentCharacter = (char) number;
3005 // throw new InvalidInputException(INVALID_ESCAPE);
3009 // public int scanIdentifierOrKeyword() throws InvalidInputException {
3010 // return scanIdentifierOrKeyword( false );
3012 public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3014 //first dispatch on the first char.
3015 //then the length. If there are several
3016 //keywors with the same length AND the same first char, then do another
3017 //disptach on the second char :-)...cool....but fast !
3018 useAssertAsAnIndentifier = false;
3019 while (getNextCharAsJavaIdentifierPart()) {
3023 // if (new String(getCurrentTokenSource()).equals("$this")) {
3024 // return TokenNamethis;
3026 return TokenNameVariable;
3031 // if (withoutUnicodePtr == 0)
3032 //quick test on length == 1 but not on length > 12 while most identifier
3033 //have a length which is <= 12...but there are lots of identifier with
3036 if ((length = currentPosition - startPosition) == 1)
3037 return TokenNameIdentifier;
3039 data = new char[length];
3040 index = startPosition;
3041 for (int i = 0; i < length; i++) {
3042 data[i] = Character.toLowerCase(source[index + i]);
3046 // if ((length = withoutUnicodePtr) == 1)
3047 // return TokenNameIdentifier;
3048 // // data = withoutUnicodeBuffer;
3049 // data = new char[withoutUnicodeBuffer.length];
3050 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3051 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3055 firstLetter = data[index];
3056 switch (firstLetter) {
3061 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3062 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3063 return TokenNameFILE;
3064 index = 0; //__LINE__
3065 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3066 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3067 return TokenNameLINE;
3071 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3072 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3073 return TokenNameCLASS_C;
3077 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3078 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3079 && (data[++index] == '_'))
3080 return TokenNameMETHOD_C;
3084 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3085 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3086 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3087 return TokenNameFUNC_C;
3090 return TokenNameIdentifier;
3092 // as and array abstract
3096 if ((data[++index] == 's')) {
3099 return TokenNameIdentifier;
3103 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3104 return TokenNameand;
3106 return TokenNameIdentifier;
3110 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3111 return TokenNamearray;
3113 return TokenNameIdentifier;
3115 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3116 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3117 return TokenNameabstract;
3119 return TokenNameIdentifier;
3121 return TokenNameIdentifier;
3127 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3128 return TokenNamebreak;
3130 return TokenNameIdentifier;
3132 return TokenNameIdentifier;
3135 //case catch class clone const continue
3138 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3139 return TokenNamecase;
3141 return TokenNameIdentifier;
3143 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3144 return TokenNamecatch;
3146 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3147 return TokenNameclass;
3149 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3150 return TokenNameclone;
3152 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3153 return TokenNameconst;
3155 return TokenNameIdentifier;
3157 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3158 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3159 return TokenNamecontinue;
3161 return TokenNameIdentifier;
3163 return TokenNameIdentifier;
3166 // declare default do die
3167 // TODO delete define ==> no keyword !
3170 if ((data[++index] == 'o'))
3173 return TokenNameIdentifier;
3175 // if ((data[++index] == 'e')
3176 // && (data[++index] == 'f')
3177 // && (data[++index] == 'i')
3178 // && (data[++index] == 'n')
3179 // && (data[++index] == 'e'))
3180 // return TokenNamedefine;
3182 // return TokenNameIdentifier;
3184 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3185 && (data[++index] == 'r') && (data[++index] == 'e'))
3186 return TokenNamedeclare;
3188 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3189 && (data[++index] == 'l') && (data[++index] == 't'))
3190 return TokenNamedefault;
3192 return TokenNameIdentifier;
3194 return TokenNameIdentifier;
3197 //echo else exit elseif extends eval
3200 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3201 return TokenNameecho;
3202 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3203 return TokenNameelse;
3204 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3205 return TokenNameexit;
3206 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3207 return TokenNameeval;
3209 return TokenNameIdentifier;
3212 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3213 return TokenNameendif;
3214 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3215 return TokenNameempty;
3217 return TokenNameIdentifier;
3220 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3221 && (data[++index] == 'r'))
3222 return TokenNameendfor;
3223 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3224 && (data[++index] == 'f'))
3225 return TokenNameelseif;
3227 return TokenNameIdentifier;
3229 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3230 && (data[++index] == 'd') && (data[++index] == 's'))
3231 return TokenNameextends;
3233 return TokenNameIdentifier;
3236 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3237 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3238 return TokenNameendwhile;
3240 return TokenNameIdentifier;
3243 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3244 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3245 return TokenNameendswitch;
3247 return TokenNameIdentifier;
3250 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3251 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3252 && (data[++index] == 'e'))
3253 return TokenNameenddeclare;
3255 if ((data[++index] == 'n') // endforeach
3256 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3257 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3258 return TokenNameendforeach;
3260 return TokenNameIdentifier;
3262 return TokenNameIdentifier;
3265 //for false final function
3268 if ((data[++index] == 'o') && (data[++index] == 'r'))
3269 return TokenNamefor;
3271 return TokenNameIdentifier;
3273 // if ((data[++index] == 'a') && (data[++index] == 'l')
3274 // && (data[++index] == 's') && (data[++index] == 'e'))
3275 // return TokenNamefalse;
3276 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3277 return TokenNamefinal;
3279 return TokenNameIdentifier;
3282 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3283 && (data[++index] == 'c') && (data[++index] == 'h'))
3284 return TokenNameforeach;
3286 return TokenNameIdentifier;
3289 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3290 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3291 return TokenNamefunction;
3293 return TokenNameIdentifier;
3295 return TokenNameIdentifier;
3300 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3301 && (data[++index] == 'l')) {
3302 return TokenNameglobal;
3305 return TokenNameIdentifier;
3307 //if int isset include include_once instanceof interface implements
3310 if (data[++index] == 'f')
3313 return TokenNameIdentifier;
3315 // if ((data[++index] == 'n') && (data[++index] == 't'))
3316 // return TokenNameint;
3318 // return TokenNameIdentifier;
3320 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3321 return TokenNameisset;
3323 return TokenNameIdentifier;
3325 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3326 && (data[++index] == 'd') && (data[++index] == 'e'))
3327 return TokenNameinclude;
3329 return TokenNameIdentifier;
3332 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3333 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3334 return TokenNameinterface;
3336 return TokenNameIdentifier;
3339 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3340 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3341 && (data[++index] == 'f'))
3342 return TokenNameinstanceof;
3343 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3344 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3345 && (data[++index] == 's'))
3346 return TokenNameimplements;
3348 return TokenNameIdentifier;
3350 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3351 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3352 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3353 return TokenNameinclude_once;
3355 return TokenNameIdentifier;
3357 return TokenNameIdentifier;
3362 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3363 return TokenNamelist;
3366 return TokenNameIdentifier;
3371 if ((data[++index] == 'e') && (data[++index] == 'w'))
3372 return TokenNamenew;
3374 return TokenNameIdentifier;
3376 // if ((data[++index] == 'u') && (data[++index] == 'l')
3377 // && (data[++index] == 'l'))
3378 // return TokenNamenull;
3380 // return TokenNameIdentifier;
3382 return TokenNameIdentifier;
3387 if (data[++index] == 'r') {
3391 // if (length == 12) {
3392 // if ((data[++index] == 'l')
3393 // && (data[++index] == 'd')
3394 // && (data[++index] == '_')
3395 // && (data[++index] == 'f')
3396 // && (data[++index] == 'u')
3397 // && (data[++index] == 'n')
3398 // && (data[++index] == 'c')
3399 // && (data[++index] == 't')
3400 // && (data[++index] == 'i')
3401 // && (data[++index] == 'o')
3402 // && (data[++index] == 'n')) {
3403 // return TokenNameold_function;
3406 return TokenNameIdentifier;
3408 // print public private protected
3411 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3412 return TokenNameprint;
3414 return TokenNameIdentifier;
3416 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3417 && (data[++index] == 'c')) {
3418 return TokenNamepublic;
3420 return TokenNameIdentifier;
3422 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3423 && (data[++index] == 't') && (data[++index] == 'e')) {
3424 return TokenNameprivate;
3426 return TokenNameIdentifier;
3428 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3429 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3430 return TokenNameprotected;
3432 return TokenNameIdentifier;
3434 return TokenNameIdentifier;
3436 //return require require_once
3438 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3439 && (data[++index] == 'n')) {
3440 return TokenNamereturn;
3442 } else if (length == 7) {
3443 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3444 && (data[++index] == 'r') && (data[++index] == 'e')) {
3445 return TokenNamerequire;
3447 } else if (length == 12) {
3448 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3449 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3450 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3451 return TokenNamerequire_once;
3454 return TokenNameIdentifier;
3459 if (data[++index] == 't')
3460 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3461 return TokenNamestatic;
3463 return TokenNameIdentifier;
3464 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3465 && (data[++index] == 'h'))
3466 return TokenNameswitch;
3468 return TokenNameIdentifier;
3470 return TokenNameIdentifier;
3476 if ((data[++index] == 'r') && (data[++index] == 'y'))
3477 return TokenNametry;
3479 return TokenNameIdentifier;
3481 // if ((data[++index] == 'r') && (data[++index] == 'u')
3482 // && (data[++index] == 'e'))
3483 // return TokenNametrue;
3485 // return TokenNameIdentifier;
3487 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3488 return TokenNamethrow;
3490 return TokenNameIdentifier;
3492 return TokenNameIdentifier;
3498 if ((data[++index] == 's') && (data[++index] == 'e'))
3499 return TokenNameuse;
3501 return TokenNameIdentifier;
3503 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3504 return TokenNameunset;
3506 return TokenNameIdentifier;
3508 return TokenNameIdentifier;
3514 if ((data[++index] == 'a') && (data[++index] == 'r'))
3515 return TokenNamevar;
3517 return TokenNameIdentifier;
3519 return TokenNameIdentifier;
3525 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3526 return TokenNamewhile;
3528 return TokenNameIdentifier;
3529 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3530 // (data[++index]=='e') && (data[++index]=='f')&&
3531 // (data[++index]=='p'))
3532 //return TokenNamewidefp ;
3534 //return TokenNameIdentifier;
3536 return TokenNameIdentifier;
3542 if ((data[++index] == 'o') && (data[++index] == 'r'))
3543 return TokenNamexor;
3545 return TokenNameIdentifier;
3547 return TokenNameIdentifier;
3550 return TokenNameIdentifier;
3554 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3555 //when entering this method the currentCharacter is the firt
3556 //digit of the number , i.e. it may be preceeded by a . when
3558 boolean floating = dotPrefix;
3559 if ((!dotPrefix) && (currentCharacter == '0')) {
3560 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3561 //force the first char of the hexa number do exist...
3562 // consume next character
3563 unicodeAsBackSlash = false;
3564 currentCharacter = source[currentPosition++];
3565 // if (((currentCharacter = source[currentPosition++]) == '\\')
3566 // && (source[currentPosition] == 'u')) {
3567 // getNextUnicodeChar();
3569 // if (withoutUnicodePtr != 0) {
3570 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3573 if (Character.digit(currentCharacter, 16) == -1)
3574 throw new InvalidInputException(INVALID_HEXA);
3576 while (getNextCharAsDigit(16)) {
3579 // if (getNextChar('l', 'L') >= 0)
3580 // return TokenNameLongLiteral;
3582 return TokenNameIntegerLiteral;
3584 //there is x or X in the number
3585 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3586 // 00078.0 is true !!!!! crazy language
3587 if (getNextCharAsDigit()) {
3588 //-------------potential octal-----------------
3589 while (getNextCharAsDigit()) {
3592 // if (getNextChar('l', 'L') >= 0) {
3593 // return TokenNameLongLiteral;
3596 // if (getNextChar('f', 'F') >= 0) {
3597 // return TokenNameFloatingPointLiteral;
3599 if (getNextChar('d', 'D') >= 0) {
3600 return TokenNameDoubleLiteral;
3601 } else { //make the distinction between octal and float ....
3602 if (getNextChar('.')) { //bingo ! ....
3603 while (getNextCharAsDigit()) {
3606 if (getNextChar('e', 'E') >= 0) {
3607 // consume next character
3608 unicodeAsBackSlash = false;
3609 currentCharacter = source[currentPosition++];
3610 // if (((currentCharacter = source[currentPosition++]) == '\\')
3611 // && (source[currentPosition] == 'u')) {
3612 // getNextUnicodeChar();
3614 // if (withoutUnicodePtr != 0) {
3615 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3618 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3619 // consume next character
3620 unicodeAsBackSlash = false;
3621 currentCharacter = source[currentPosition++];
3622 // if (((currentCharacter = source[currentPosition++]) == '\\')
3623 // && (source[currentPosition] == 'u')) {
3624 // getNextUnicodeChar();
3626 // if (withoutUnicodePtr != 0) {
3627 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3628 // currentCharacter;
3632 if (!Character.isDigit(currentCharacter))
3633 throw new InvalidInputException(INVALID_FLOAT);
3634 while (getNextCharAsDigit()) {
3638 // if (getNextChar('f', 'F') >= 0)
3639 // return TokenNameFloatingPointLiteral;
3640 getNextChar('d', 'D'); //jump over potential d or D
3641 return TokenNameDoubleLiteral;
3643 return TokenNameIntegerLiteral;
3650 while (getNextCharAsDigit()) {
3653 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3654 // return TokenNameLongLiteral;
3655 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3656 while (getNextCharAsDigit()) {
3661 //if floating is true both exponant and suffix may be optional
3662 if (getNextChar('e', 'E') >= 0) {
3664 // consume next character
3665 unicodeAsBackSlash = false;
3666 currentCharacter = source[currentPosition++];
3667 // if (((currentCharacter = source[currentPosition++]) == '\\')
3668 // && (source[currentPosition] == 'u')) {
3669 // getNextUnicodeChar();
3671 // if (withoutUnicodePtr != 0) {
3672 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3675 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3678 unicodeAsBackSlash = false;
3679 currentCharacter = source[currentPosition++];
3680 // if (((currentCharacter = source[currentPosition++]) == '\\')
3681 // && (source[currentPosition] == 'u')) {
3682 // getNextUnicodeChar();
3684 // if (withoutUnicodePtr != 0) {
3685 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3689 if (!Character.isDigit(currentCharacter))
3690 throw new InvalidInputException(INVALID_FLOAT);
3691 while (getNextCharAsDigit()) {
3695 if (getNextChar('d', 'D') >= 0)
3696 return TokenNameDoubleLiteral;
3697 // if (getNextChar('f', 'F') >= 0)
3698 // return TokenNameFloatingPointLiteral;
3699 //the long flag has been tested before
3700 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3704 * Search the line number corresponding to a specific position
3707 public final int getLineNumber(int position) {
3708 if (lineEnds == null)
3710 int length = linePtr + 1;
3713 int g = 0, d = length - 1;
3717 if (position < lineEnds[m]) {
3719 } else if (position > lineEnds[m]) {
3725 if (position < lineEnds[m]) {
3731 public void setPHPMode(boolean mode) {
3735 public final void setSource(char[] source) {
3736 setSource(null, source);
3739 public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3740 //the source-buffer is set to sourceString
3741 this.compilationUnit = compilationUnit;
3742 if (source == null) {
3743 this.source = new char[0];
3745 this.source = source;
3748 initialPosition = currentPosition = 0;
3749 containsAssertKeyword = false;
3750 withoutUnicodeBuffer = new char[this.source.length];
3751 encapsedStringStack = new Stack();
3754 public String toString() {
3755 if (startPosition == source.length)
3756 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3757 if (currentPosition > source.length)
3758 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3759 char front[] = new char[startPosition];
3760 System.arraycopy(source, 0, front, 0, startPosition);
3761 int middleLength = (currentPosition - 1) - startPosition + 1;
3763 if (middleLength > -1) {
3764 middle = new char[middleLength];
3765 System.arraycopy(source, startPosition, middle, 0, middleLength);
3767 middle = new char[0];
3769 char end[] = new char[source.length - (currentPosition - 1)];
3770 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3771 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3772 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3776 public final String toStringAction(int act) {
3778 case TokenNameERROR:
3779 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3781 case TokenNameINLINE_HTML:
3782 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3783 case TokenNameIdentifier:
3784 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3785 case TokenNameVariable:
3786 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3787 case TokenNameabstract:
3788 return "abstract"; //$NON-NLS-1$
3790 return "AND"; //$NON-NLS-1$
3791 case TokenNamearray:
3792 return "array"; //$NON-NLS-1$
3794 return "as"; //$NON-NLS-1$
3795 case TokenNamebreak:
3796 return "break"; //$NON-NLS-1$
3798 return "case"; //$NON-NLS-1$
3799 case TokenNameclass:
3800 return "class"; //$NON-NLS-1$
3801 case TokenNamecatch:
3802 return "catch"; //$NON-NLS-1$
3803 case TokenNameclone:
3806 case TokenNameconst:
3809 case TokenNamecontinue:
3810 return "continue"; //$NON-NLS-1$
3811 case TokenNamedefault:
3812 return "default"; //$NON-NLS-1$
3813 // case TokenNamedefine :
3814 // return "define"; //$NON-NLS-1$
3816 return "do"; //$NON-NLS-1$
3818 return "echo"; //$NON-NLS-1$
3820 return "else"; //$NON-NLS-1$
3821 case TokenNameelseif:
3822 return "elseif"; //$NON-NLS-1$
3823 case TokenNameendfor:
3824 return "endfor"; //$NON-NLS-1$
3825 case TokenNameendforeach:
3826 return "endforeach"; //$NON-NLS-1$
3827 case TokenNameendif:
3828 return "endif"; //$NON-NLS-1$
3829 case TokenNameendswitch:
3830 return "endswitch"; //$NON-NLS-1$
3831 case TokenNameendwhile:
3832 return "endwhile"; //$NON-NLS-1$
3835 case TokenNameextends:
3836 return "extends"; //$NON-NLS-1$
3837 // case TokenNamefalse :
3838 // return "false"; //$NON-NLS-1$
3839 case TokenNamefinal:
3840 return "final"; //$NON-NLS-1$
3842 return "for"; //$NON-NLS-1$
3843 case TokenNameforeach:
3844 return "foreach"; //$NON-NLS-1$
3845 case TokenNamefunction:
3846 return "function"; //$NON-NLS-1$
3847 case TokenNameglobal:
3848 return "global"; //$NON-NLS-1$
3850 return "if"; //$NON-NLS-1$
3851 case TokenNameimplements:
3852 return "implements"; //$NON-NLS-1$
3853 case TokenNameinclude:
3854 return "include"; //$NON-NLS-1$
3855 case TokenNameinclude_once:
3856 return "include_once"; //$NON-NLS-1$
3857 case TokenNameinstanceof:
3858 return "instanceof"; //$NON-NLS-1$
3859 case TokenNameinterface:
3860 return "interface"; //$NON-NLS-1$
3861 case TokenNameisset:
3862 return "isset"; //$NON-NLS-1$
3864 return "list"; //$NON-NLS-1$
3866 return "new"; //$NON-NLS-1$
3867 // case TokenNamenull :
3868 // return "null"; //$NON-NLS-1$
3870 return "OR"; //$NON-NLS-1$
3871 case TokenNameprint:
3872 return "print"; //$NON-NLS-1$
3873 case TokenNameprivate:
3874 return "private"; //$NON-NLS-1$
3875 case TokenNameprotected:
3876 return "protected"; //$NON-NLS-1$
3877 case TokenNamepublic:
3878 return "public"; //$NON-NLS-1$
3879 case TokenNamerequire:
3880 return "require"; //$NON-NLS-1$
3881 case TokenNamerequire_once:
3882 return "require_once"; //$NON-NLS-1$
3883 case TokenNamereturn:
3884 return "return"; //$NON-NLS-1$
3885 case TokenNamestatic:
3886 return "static"; //$NON-NLS-1$
3887 case TokenNameswitch:
3888 return "switch"; //$NON-NLS-1$
3889 // case TokenNametrue :
3890 // return "true"; //$NON-NLS-1$
3891 case TokenNameunset:
3892 return "unset"; //$NON-NLS-1$
3894 return "var"; //$NON-NLS-1$
3895 case TokenNamewhile:
3896 return "while"; //$NON-NLS-1$
3898 return "XOR"; //$NON-NLS-1$
3899 // case TokenNamethis :
3900 // return "$this"; //$NON-NLS-1$
3901 case TokenNameIntegerLiteral:
3902 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3903 case TokenNameDoubleLiteral:
3904 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3905 case TokenNameStringDoubleQuote:
3906 return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3907 case TokenNameStringSingleQuote:
3908 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3909 case TokenNameStringInterpolated:
3910 return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3911 case TokenNameEncapsedString0:
3912 return "`"; //$NON-NLS-1$
3913 case TokenNameEncapsedString1:
3914 return "\'"; //$NON-NLS-1$
3915 case TokenNameEncapsedString2:
3916 return "\""; //$NON-NLS-1$
3917 case TokenNameSTRING:
3918 return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3919 case TokenNameHEREDOC:
3920 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3921 case TokenNamePLUS_PLUS:
3922 return "++"; //$NON-NLS-1$
3923 case TokenNameMINUS_MINUS:
3924 return "--"; //$NON-NLS-1$
3925 case TokenNameEQUAL_EQUAL:
3926 return "=="; //$NON-NLS-1$
3927 case TokenNameEQUAL_EQUAL_EQUAL:
3928 return "==="; //$NON-NLS-1$
3929 case TokenNameEQUAL_GREATER:
3930 return "=>"; //$NON-NLS-1$
3931 case TokenNameLESS_EQUAL:
3932 return "<="; //$NON-NLS-1$
3933 case TokenNameGREATER_EQUAL:
3934 return ">="; //$NON-NLS-1$
3935 case TokenNameNOT_EQUAL:
3936 return "!="; //$NON-NLS-1$
3937 case TokenNameNOT_EQUAL_EQUAL:
3938 return "!=="; //$NON-NLS-1$
3939 case TokenNameLEFT_SHIFT:
3940 return "<<"; //$NON-NLS-1$
3941 case TokenNameRIGHT_SHIFT:
3942 return ">>"; //$NON-NLS-1$
3943 case TokenNamePLUS_EQUAL:
3944 return "+="; //$NON-NLS-1$
3945 case TokenNameMINUS_EQUAL:
3946 return "-="; //$NON-NLS-1$
3947 case TokenNameMULTIPLY_EQUAL:
3948 return "*="; //$NON-NLS-1$
3949 case TokenNameDIVIDE_EQUAL:
3950 return "/="; //$NON-NLS-1$
3951 case TokenNameAND_EQUAL:
3952 return "&="; //$NON-NLS-1$
3953 case TokenNameOR_EQUAL:
3954 return "|="; //$NON-NLS-1$
3955 case TokenNameXOR_EQUAL:
3956 return "^="; //$NON-NLS-1$
3957 case TokenNameREMAINDER_EQUAL:
3958 return "%="; //$NON-NLS-1$
3959 case TokenNameDOT_EQUAL:
3960 return ".="; //$NON-NLS-1$
3961 case TokenNameLEFT_SHIFT_EQUAL:
3962 return "<<="; //$NON-NLS-1$
3963 case TokenNameRIGHT_SHIFT_EQUAL:
3964 return ">>="; //$NON-NLS-1$
3965 case TokenNameOR_OR:
3966 return "||"; //$NON-NLS-1$
3967 case TokenNameAND_AND:
3968 return "&&"; //$NON-NLS-1$
3970 return "+"; //$NON-NLS-1$
3971 case TokenNameMINUS:
3972 return "-"; //$NON-NLS-1$
3973 case TokenNameMINUS_GREATER:
3976 return "!"; //$NON-NLS-1$
3977 case TokenNameREMAINDER:
3978 return "%"; //$NON-NLS-1$
3980 return "^"; //$NON-NLS-1$
3982 return "&"; //$NON-NLS-1$
3983 case TokenNameMULTIPLY:
3984 return "*"; //$NON-NLS-1$
3986 return "|"; //$NON-NLS-1$
3987 case TokenNameTWIDDLE:
3988 return "~"; //$NON-NLS-1$
3989 case TokenNameTWIDDLE_EQUAL:
3990 return "~="; //$NON-NLS-1$
3991 case TokenNameDIVIDE:
3992 return "/"; //$NON-NLS-1$
3993 case TokenNameGREATER:
3994 return ">"; //$NON-NLS-1$
3996 return "<"; //$NON-NLS-1$
3997 case TokenNameLPAREN:
3998 return "("; //$NON-NLS-1$
3999 case TokenNameRPAREN:
4000 return ")"; //$NON-NLS-1$
4001 case TokenNameLBRACE:
4002 return "{"; //$NON-NLS-1$
4003 case TokenNameRBRACE:
4004 return "}"; //$NON-NLS-1$
4005 case TokenNameLBRACKET:
4006 return "["; //$NON-NLS-1$
4007 case TokenNameRBRACKET:
4008 return "]"; //$NON-NLS-1$
4009 case TokenNameSEMICOLON:
4010 return ";"; //$NON-NLS-1$
4011 case TokenNameQUESTION:
4012 return "?"; //$NON-NLS-1$
4013 case TokenNameCOLON:
4014 return ":"; //$NON-NLS-1$
4015 case TokenNameCOMMA:
4016 return ","; //$NON-NLS-1$
4018 return "."; //$NON-NLS-1$
4019 case TokenNameEQUAL:
4020 return "="; //$NON-NLS-1$
4023 case TokenNameDOLLAR:
4025 case TokenNameDOLLAR_LBRACE:
4027 case TokenNameLBRACE_DOLLAR:
4030 return "EOF"; //$NON-NLS-1$
4031 case TokenNameWHITESPACE:
4032 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4033 case TokenNameCOMMENT_LINE:
4034 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4035 case TokenNameCOMMENT_BLOCK:
4036 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4037 case TokenNameCOMMENT_PHPDOC:
4038 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4039 // case TokenNameHTML :
4040 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4043 return "__FILE__"; //$NON-NLS-1$
4045 return "__LINE__"; //$NON-NLS-1$
4046 case TokenNameCLASS_C:
4047 return "__CLASS__"; //$NON-NLS-1$
4048 case TokenNameMETHOD_C:
4049 return "__METHOD__"; //$NON-NLS-1$
4050 case TokenNameFUNC_C:
4051 return "__FUNCTION__"; //$NON-NLS-1
4052 case TokenNameboolCAST:
4053 return "( bool )"; //$NON-NLS-1$
4054 case TokenNameintCAST:
4055 return "( int )"; //$NON-NLS-1$
4056 case TokenNamedoubleCAST:
4057 return "( double )"; //$NON-NLS-1$
4058 case TokenNameobjectCAST:
4059 return "( object )"; //$NON-NLS-1$
4060 case TokenNamestringCAST:
4061 return "( string )"; //$NON-NLS-1$
4063 return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4071 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4072 this(tokenizeComments, tokenizeWhiteSpace, false);
4075 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4076 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4079 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4080 boolean assertMode) {
4081 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4084 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4085 boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4086 this.eofPosition = Integer.MAX_VALUE;
4087 this.tokenizeComments = tokenizeComments;
4088 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4089 this.tokenizeStrings = tokenizeStrings;
4090 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4091 this.assertMode = assertMode;
4092 this.encapsedStringStack = null;
4093 this.taskTags = taskTags;
4094 this.taskPriorities = taskPriorities;
4097 private void checkNonExternalizeString() throws InvalidInputException {
4098 if (currentLine == null)
4100 parseTags(currentLine);
4103 private void parseTags(NLSLine line) throws InvalidInputException {
4104 String s = new String(getCurrentTokenSource());
4105 int pos = s.indexOf(TAG_PREFIX);
4106 int lineLength = line.size();
4108 int start = pos + TAG_PREFIX_LENGTH;
4109 int end = s.indexOf(TAG_POSTFIX, start);
4110 String index = s.substring(start, end);
4113 i = Integer.parseInt(index) - 1;
4114 // Tags are one based not zero based.
4115 } catch (NumberFormatException e) {
4116 i = -1; // we don't want to consider this as a valid NLS tag
4118 if (line.exists(i)) {
4121 pos = s.indexOf(TAG_PREFIX, start);
4123 this.nonNLSStrings = new StringLiteral[lineLength];
4124 int nonNLSCounter = 0;
4125 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4126 StringLiteral literal = (StringLiteral) iterator.next();
4127 if (literal != null) {
4128 this.nonNLSStrings[nonNLSCounter++] = literal;
4131 if (nonNLSCounter == 0) {
4132 this.nonNLSStrings = null;
4136 this.wasNonExternalizedStringLiteral = true;
4137 if (nonNLSCounter != lineLength) {
4138 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4143 public final void scanEscapeCharacter() throws InvalidInputException {
4144 // the string with "\\u" is a legal string of two chars \ and u
4145 //thus we use a direct access to the source (for regular cases).
4146 if (unicodeAsBackSlash) {
4147 // consume next character
4148 unicodeAsBackSlash = false;
4149 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4150 // (source[currentPosition] == 'u')) {
4151 // getNextUnicodeChar();
4153 if (withoutUnicodePtr != 0) {
4154 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4158 currentCharacter = source[currentPosition++];
4159 switch (currentCharacter) {
4161 currentCharacter = '\b';
4164 currentCharacter = '\t';
4167 currentCharacter = '\n';
4170 currentCharacter = '\f';
4173 currentCharacter = '\r';
4176 currentCharacter = '\"';
4179 currentCharacter = '\'';
4182 currentCharacter = '\\';
4185 // -----------octal escape--------------
4187 // OctalDigit OctalDigit
4188 // ZeroToThree OctalDigit OctalDigit
4189 int number = Character.getNumericValue(currentCharacter);
4190 if (number >= 0 && number <= 7) {
4191 boolean zeroToThreeNot = number > 3;
4192 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4193 int digit = Character.getNumericValue(currentCharacter);
4194 if (digit >= 0 && digit <= 7) {
4195 number = (number * 8) + digit;
4196 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4197 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4198 // Digit --> ignore last character
4201 digit = Character.getNumericValue(currentCharacter);
4202 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4203 // OctalDigit OctalDigit
4204 number = (number * 8) + digit;
4205 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4206 // --> ignore last character
4210 } else { // has read \OctalDigit NonDigit--> ignore last
4214 } else { // has read \OctalDigit NonOctalDigit--> ignore last
4218 } else { // has read \OctalDigit --> ignore last character
4222 throw new InvalidInputException(INVALID_ESCAPE);
4223 currentCharacter = (char) number;
4225 throw new InvalidInputException(INVALID_ESCAPE);
4229 //chech presence of task: tags
4230 //TODO (frederic) see if we need to take unicode characters into account...
4231 public void checkTaskTag(int commentStart, int commentEnd) {
4232 char[] src = this.source;
4234 // only look for newer task: tags
4235 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4238 int foundTaskIndex = this.foundTaskCount;
4239 char previous = src[commentStart + 1]; // should be '*' or '/'
4240 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4242 char[] priority = null;
4243 // check for tag occurrence only if not ambiguous with javadoc tag
4244 if (previous != '@') {
4245 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4246 tag = this.taskTags[itag];
4247 int tagLength = tag.length;
4251 // ensure tag is not leaded with letter if tag starts with a letter
4252 if (Scanner.isPHPIdentifierStart(tag[0])) {
4253 if (Scanner.isPHPIdentifierPart(previous)) {
4258 for (int t = 0; t < tagLength; t++) {
4261 if (x >= this.eofPosition || x >= commentEnd)
4263 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4264 if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case insensitive check
4269 // ensure tag is not followed with letter if tag finishes with a letter
4270 if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4271 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4274 if (this.foundTaskTags == null) {
4275 this.foundTaskTags = new char[5][];
4276 this.foundTaskMessages = new char[5][];
4277 this.foundTaskPriorities = new char[5][];
4278 this.foundTaskPositions = new int[5][];
4279 } else if (this.foundTaskCount == this.foundTaskTags.length) {
4280 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4281 this.foundTaskCount);
4282 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4283 this.foundTaskCount);
4284 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4285 this.foundTaskCount);
4286 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4287 this.foundTaskCount);
4290 priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4292 this.foundTaskTags[this.foundTaskCount] = tag;
4293 this.foundTaskPriorities[this.foundTaskCount] = priority;
4294 this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4295 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4296 this.foundTaskCount++;
4297 i += tagLength - 1; // will be incremented when looping
4303 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4304 // retrieve message start and end positions
4305 int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4306 int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4307 // at most beginning of next task
4308 if (max_value < msgStart) {
4309 max_value = msgStart; // would only occur if tag is before EOF.
4313 for (int j = msgStart; j < max_value; j++) {
4314 if ((c = src[j]) == '\n' || c == '\r') {
4320 for (int j = max_value; j > msgStart; j--) {
4321 if ((c = src[j]) == '*') {
4329 if (msgStart == end)
4332 while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4334 while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4336 // update the end position of the task
4337 this.foundTaskPositions[i][1] = end;
4338 // get the message source
4339 final int messageLength = end - msgStart + 1;
4340 char[] message = new char[messageLength];
4341 System.arraycopy(src, msgStart, message, 0, messageLength);
4342 this.foundTaskMessages[i] = message;
4346 // chech presence of task: tags
4347 // public void checkTaskTag(int commentStart, int commentEnd) {
4348 // // only look for newer task: tags
4349 // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4352 // int foundTaskIndex = this.foundTaskCount;
4353 // nextChar: for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
4354 // char[] tag = null;
4355 // char[] priority = null;
4356 // // check for tag occurrence
4357 // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4358 // tag = this.taskTags[itag];
4359 // priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4360 // int tagLength = tag.length;
4361 // for (int t = 0; t < tagLength; t++) {
4362 // if (this.source[i + t] != tag[t])
4363 // continue nextTag;
4365 // if (this.foundTaskTags == null) {
4366 // this.foundTaskTags = new char[5][];
4367 // this.foundTaskMessages = new char[5][];
4368 // this.foundTaskPriorities = new char[5][];
4369 // this.foundTaskPositions = new int[5][];
4370 // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4371 // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4372 // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4373 // this.foundTaskCount);
4374 // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4375 // this.foundTaskCount);
4376 // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4377 // this.foundTaskCount);
4379 // this.foundTaskTags[this.foundTaskCount] = tag;
4380 // this.foundTaskPriorities[this.foundTaskCount] = priority;
4381 // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4382 // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4383 // this.foundTaskCount++;
4384 // i += tagLength - 1; // will be incremented when looping
4387 // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4388 // // retrieve message start and end positions
4389 // int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4390 // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4391 // // at most beginning of next task
4392 // if (max_value < msgStart)
4393 // max_value = msgStart; // would only occur if tag is before EOF.
4396 // for (int j = msgStart; j < max_value; j++) {
4397 // if ((c = this.source[j]) == '\n' || c == '\r') {
4403 // for (int j = max_value; j > msgStart; j--) {
4404 // if ((c = this.source[j]) == '*') {
4412 // if (msgStart == end)
4413 // continue; // empty
4414 // // trim the message
4415 // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4417 // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4419 // // update the end position of the task
4420 // this.foundTaskPositions[i][1] = end;
4421 // // get the message source
4422 // final int messageLength = end - msgStart + 1;
4423 // char[] message = new char[messageLength];
4424 // System.arraycopy(source, msgStart, message, 0, messageLength);
4425 // this.foundTaskMessages[i] = message;