1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
22 * APIs ares - getNextToken() which return the current type of the token
23 * (this value is not memorized by the scanner) - getCurrentTokenSource()
24 * which provides with the token "REAL" source (aka all unicode have been
25 * transformed into a correct char) - sourceStart gives the position into the
26 * stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
30 public boolean useAssertAsAnIndentifier = false;
31 //flag indicating if processed source contains occurrences of keyword assert
32 public boolean containsAssertKeyword = false;
33 public boolean recordLineSeparator;
34 public boolean phpMode = false;
35 public char currentCharacter;
36 public int startPosition;
37 public int currentPosition;
38 public int initialPosition, eofPosition;
39 // after this position eof are generated instead of real token from the
41 public boolean tokenizeComments;
42 public boolean tokenizeWhiteSpace;
43 //source should be viewed as a window (aka a part)
44 //of a entire very large stream
47 public char[] withoutUnicodeBuffer;
48 public int withoutUnicodePtr;
49 //when == 0 ==> no unicode in the current token
50 public boolean unicodeAsBackSlash = false;
51 public boolean scanningFloatLiteral = false;
52 //support for /** comments
53 //public char[][] comments = new char[10][];
54 public int[] commentStops = new int[10];
55 public int[] commentStarts = new int[10];
56 public int commentPtr = -1; // no comment test with commentPtr value -1
57 //diet parsing support - jump over some method body when requested
58 public boolean diet = false;
59 //support for the poor-line-debuggers ....
60 //remember the position of the cr/lf
61 public int[] lineEnds = new int[250];
62 public int linePtr = -1;
63 public boolean wasAcr = false;
64 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
65 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
66 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
67 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
68 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
69 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
70 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
71 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
72 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
73 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
74 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
75 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
76 //----------------optimized identifier managment------------------
77 static final char[] charArray_a = new char[]{'a'},
78 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
79 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
80 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
81 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
82 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
83 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
84 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
85 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
86 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
87 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
88 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
89 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
90 charArray_z = new char[]{'z'};
91 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
92 '\u0000', '\u0000', '\u0000'};
93 static final int TableSize = 30, InternalTableSize = 6;
95 public static final int OptimizedLength = 6;
97 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
98 // support for detecting non-externalized string literals
99 int currentLineNr = -1;
100 int previousLineNr = -1;
101 NLSLine currentLine = null;
102 List lines = new ArrayList();
103 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
104 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
105 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
106 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
107 public StringLiteral[] nonNLSStrings = null;
108 public boolean checkNonExternalizedStringLiterals = true;
109 public boolean wasNonExternalizedStringLiteral = false;
111 for (int i = 0; i < 6; i++) {
112 for (int j = 0; j < TableSize; j++) {
113 for (int k = 0; k < InternalTableSize; k++) {
114 charArray_length[i][j][k] = initCharArray;
119 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
121 public static final int RoundBracket = 0;
122 public static final int SquareBracket = 1;
123 public static final int CurlyBracket = 2;
124 public static final int BracketKinds = 3;
126 public char[][] foundTaskTags = null;
127 public char[][] foundTaskMessages;
128 public char[][] foundTaskPriorities = null;
129 public int[][] foundTaskPositions;
130 public int foundTaskCount = 0;
131 public char[][] taskTags = null;
132 public char[][] taskPriorities = null;
133 public static final boolean DEBUG = false;
134 public static final boolean TRACE = false;
138 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
139 this(tokenizeComments, tokenizeWhiteSpace, false);
142 * Determines if the specified character is permissible as the first
143 * character in a PHP identifier
145 public static boolean isPHPIdentifierStart(char ch) {
146 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
149 * Determines if the specified character may be part of a PHP identifier as
150 * other than the first character
152 public static boolean isPHPIdentifierPart(char ch) {
153 return Character.isLetterOrDigit(ch) || (ch == '_')
154 || (0x7F <= ch && ch <= 0xFF);
156 public final boolean atEnd() {
157 // This code is not relevant if source is
158 // Only a part of the real stream input
159 return source.length == currentPosition;
161 public char[] getCurrentIdentifierSource() {
162 //return the token REAL source (aka unicodes are precomputed)
164 // if (withoutUnicodePtr != 0)
165 // //0 is used as a fast test flag so the real first char is in position 1
167 // withoutUnicodeBuffer,
169 // result = new char[withoutUnicodePtr],
171 // withoutUnicodePtr);
173 int length = currentPosition - startPosition;
174 switch (length) { // see OptimizedLength
176 return optimizedCurrentTokenSource1();
178 return optimizedCurrentTokenSource2();
180 return optimizedCurrentTokenSource3();
182 return optimizedCurrentTokenSource4();
184 return optimizedCurrentTokenSource5();
186 return optimizedCurrentTokenSource6();
189 System.arraycopy(source, startPosition, result = new char[length], 0,
194 public int getCurrentTokenEndPosition() {
195 return this.currentPosition - 1;
197 public final char[] getCurrentTokenSource() {
198 // Return the token REAL source (aka unicodes are precomputed)
200 // if (withoutUnicodePtr != 0)
201 // // 0 is used as a fast test flag so the real first char is in position 1
203 // withoutUnicodeBuffer,
205 // result = new char[withoutUnicodePtr],
207 // withoutUnicodePtr);
210 System.arraycopy(source, startPosition,
211 result = new char[length = currentPosition - startPosition], 0, length);
215 public final char[] getCurrentTokenSource(int startPos) {
216 // Return the token REAL source (aka unicodes are precomputed)
218 // if (withoutUnicodePtr != 0)
219 // // 0 is used as a fast test flag so the real first char is in position 1
221 // withoutUnicodeBuffer,
223 // result = new char[withoutUnicodePtr],
225 // withoutUnicodePtr);
228 System.arraycopy(source, startPos,
229 result = new char[length = currentPosition - startPos], 0, length);
233 public final char[] getCurrentTokenSourceString() {
234 //return the token REAL source (aka unicodes are precomputed).
235 //REMOVE the two " that are at the beginning and the end.
237 if (withoutUnicodePtr != 0)
238 //0 is used as a fast test flag so the real first char is in position 1
239 System.arraycopy(withoutUnicodeBuffer, 2,
240 //2 is 1 (real start) + 1 (to jump over the ")
241 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
244 System.arraycopy(source, startPosition + 1,
245 result = new char[length = currentPosition - startPosition - 2], 0,
250 public int getCurrentTokenStartPosition() {
251 return this.startPosition;
253 public final char[] getCurrentStringLiteralSource() {
254 // Return the token REAL source (aka unicodes are precomputed)
257 System.arraycopy(source, startPosition + 1,
258 result = new char[length = currentPosition - startPosition - 2], 0,
264 * Search the source position corresponding to the end of a given line number
266 * Line numbers are 1-based, and relative to the scanner initialPosition.
267 * Character positions are 0-based.
269 * In case the given line number is inconsistent, answers -1.
271 public final int getLineEnd(int lineNumber) {
272 if (lineEnds == null)
274 if (lineNumber >= lineEnds.length)
278 if (lineNumber == lineEnds.length - 1)
280 return lineEnds[lineNumber - 1];
281 // next line start one character behind the lineEnd of the previous line
284 * Search the source position corresponding to the beginning of a given line
287 * Line numbers are 1-based, and relative to the scanner initialPosition.
288 * Character positions are 0-based.
290 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
292 * In case the given line number is inconsistent, answers -1.
294 public final int getLineStart(int lineNumber) {
295 if (lineEnds == null)
297 if (lineNumber >= lineEnds.length)
302 return initialPosition;
303 return lineEnds[lineNumber - 2] + 1;
304 // next line start one character behind the lineEnd of the previous line
306 public final boolean getNextChar(char testedChar) {
308 //handle the case of unicode.
309 //when a unicode appears then we must use a buffer that holds char
311 //At the end of this method currentCharacter holds the new visited char
312 //and currentPosition points right next after it
313 //Both previous lines are true if the currentCharacter is == to the
315 //On false, no side effect has occured.
316 //ALL getNextChar.... ARE OPTIMIZED COPIES
317 int temp = currentPosition;
319 currentCharacter = source[currentPosition++];
320 // if (((currentCharacter = source[currentPosition++]) == '\\')
321 // && (source[currentPosition] == 'u')) {
322 // //-------------unicode traitement ------------
323 // int c1, c2, c3, c4;
324 // int unicodeSize = 6;
325 // currentPosition++;
326 // while (source[currentPosition] == 'u') {
327 // currentPosition++;
331 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
333 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
335 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
337 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
339 // currentPosition = temp;
343 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
344 // if (currentCharacter != testedChar) {
345 // currentPosition = temp;
348 // unicodeAsBackSlash = currentCharacter == '\\';
350 // //need the unicode buffer
351 // if (withoutUnicodePtr == 0) {
352 // //buffer all the entries that have been left aside....
353 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
357 // withoutUnicodeBuffer,
359 // withoutUnicodePtr);
361 // //fill the buffer with the char
362 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
365 // } //-------------end unicode traitement--------------
367 if (currentCharacter != testedChar) {
368 currentPosition = temp;
371 unicodeAsBackSlash = false;
372 // if (withoutUnicodePtr != 0)
373 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
376 } catch (IndexOutOfBoundsException e) {
377 unicodeAsBackSlash = false;
378 currentPosition = temp;
382 public final int getNextChar(char testedChar1, char testedChar2) {
383 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
384 //test can be done with (x==0) for the first and (x>0) for the second
385 //handle the case of unicode.
386 //when a unicode appears then we must use a buffer that holds char
388 //At the end of this method currentCharacter holds the new visited char
389 //and currentPosition points right next after it
390 //Both previous lines are true if the currentCharacter is == to the
392 //On false, no side effect has occured.
393 //ALL getNextChar.... ARE OPTIMIZED COPIES
394 int temp = currentPosition;
397 currentCharacter = source[currentPosition++];
398 // if (((currentCharacter = source[currentPosition++]) == '\\')
399 // && (source[currentPosition] == 'u')) {
400 // //-------------unicode traitement ------------
401 // int c1, c2, c3, c4;
402 // int unicodeSize = 6;
403 // currentPosition++;
404 // while (source[currentPosition] == 'u') {
405 // currentPosition++;
409 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
411 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
413 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
415 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
417 // currentPosition = temp;
421 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
422 // if (currentCharacter == testedChar1)
424 // else if (currentCharacter == testedChar2)
427 // currentPosition = temp;
431 // //need the unicode buffer
432 // if (withoutUnicodePtr == 0) {
433 // //buffer all the entries that have been left aside....
434 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
438 // withoutUnicodeBuffer,
440 // withoutUnicodePtr);
442 // //fill the buffer with the char
443 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
445 // } //-------------end unicode traitement--------------
447 if (currentCharacter == testedChar1)
449 else if (currentCharacter == testedChar2)
452 currentPosition = temp;
455 // if (withoutUnicodePtr != 0)
456 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
459 } catch (IndexOutOfBoundsException e) {
460 currentPosition = temp;
464 public final boolean getNextCharAsDigit() {
466 //handle the case of unicode.
467 //when a unicode appears then we must use a buffer that holds char
469 //At the end of this method currentCharacter holds the new visited char
470 //and currentPosition points right next after it
471 //Both previous lines are true if the currentCharacter is a digit
472 //On false, no side effect has occured.
473 //ALL getNextChar.... ARE OPTIMIZED COPIES
474 int temp = currentPosition;
476 currentCharacter = source[currentPosition++];
477 // if (((currentCharacter = source[currentPosition++]) == '\\')
478 // && (source[currentPosition] == 'u')) {
479 // //-------------unicode traitement ------------
480 // int c1, c2, c3, c4;
481 // int unicodeSize = 6;
482 // currentPosition++;
483 // while (source[currentPosition] == 'u') {
484 // currentPosition++;
488 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
490 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
492 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
494 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
496 // currentPosition = temp;
500 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
501 // if (!Character.isDigit(currentCharacter)) {
502 // currentPosition = temp;
506 // //need the unicode buffer
507 // if (withoutUnicodePtr == 0) {
508 // //buffer all the entries that have been left aside....
509 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
513 // withoutUnicodeBuffer,
515 // withoutUnicodePtr);
517 // //fill the buffer with the char
518 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
520 // } //-------------end unicode traitement--------------
522 if (!Character.isDigit(currentCharacter)) {
523 currentPosition = temp;
526 // if (withoutUnicodePtr != 0)
527 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
530 } catch (IndexOutOfBoundsException e) {
531 currentPosition = temp;
535 public final boolean getNextCharAsDigit(int radix) {
537 //handle the case of unicode.
538 //when a unicode appears then we must use a buffer that holds char
540 //At the end of this method currentCharacter holds the new visited char
541 //and currentPosition points right next after it
542 //Both previous lines are true if the currentCharacter is a digit base on
544 //On false, no side effect has occured.
545 //ALL getNextChar.... ARE OPTIMIZED COPIES
546 int temp = currentPosition;
548 currentCharacter = source[currentPosition++];
549 // if (((currentCharacter = source[currentPosition++]) == '\\')
550 // && (source[currentPosition] == 'u')) {
551 // //-------------unicode traitement ------------
552 // int c1, c2, c3, c4;
553 // int unicodeSize = 6;
554 // currentPosition++;
555 // while (source[currentPosition] == 'u') {
556 // currentPosition++;
560 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
562 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
564 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
566 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
568 // currentPosition = temp;
572 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
573 // if (Character.digit(currentCharacter, radix) == -1) {
574 // currentPosition = temp;
578 // //need the unicode buffer
579 // if (withoutUnicodePtr == 0) {
580 // //buffer all the entries that have been left aside....
581 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
585 // withoutUnicodeBuffer,
587 // withoutUnicodePtr);
589 // //fill the buffer with the char
590 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
592 // } //-------------end unicode traitement--------------
594 if (Character.digit(currentCharacter, radix) == -1) {
595 currentPosition = temp;
598 // if (withoutUnicodePtr != 0)
599 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
602 } catch (IndexOutOfBoundsException e) {
603 currentPosition = temp;
607 public boolean getNextCharAsJavaIdentifierPart() {
609 //handle the case of unicode.
610 //when a unicode appears then we must use a buffer that holds char
612 //At the end of this method currentCharacter holds the new visited char
613 //and currentPosition points right next after it
614 //Both previous lines are true if the currentCharacter is a
615 // JavaIdentifierPart
616 //On false, no side effect has occured.
617 //ALL getNextChar.... ARE OPTIMIZED COPIES
618 int temp = currentPosition;
620 currentCharacter = source[currentPosition++];
621 // if (((currentCharacter = source[currentPosition++]) == '\\')
622 // && (source[currentPosition] == 'u')) {
623 // //-------------unicode traitement ------------
624 // int c1, c2, c3, c4;
625 // int unicodeSize = 6;
626 // currentPosition++;
627 // while (source[currentPosition] == 'u') {
628 // currentPosition++;
632 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
634 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
636 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
638 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
640 // currentPosition = temp;
644 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
645 // if (!isPHPIdentifierPart(currentCharacter)) {
646 // currentPosition = temp;
650 // //need the unicode buffer
651 // if (withoutUnicodePtr == 0) {
652 // //buffer all the entries that have been left aside....
653 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
657 // withoutUnicodeBuffer,
659 // withoutUnicodePtr);
661 // //fill the buffer with the char
662 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
664 // } //-------------end unicode traitement--------------
666 if (!isPHPIdentifierPart(currentCharacter)) {
667 currentPosition = temp;
670 // if (withoutUnicodePtr != 0)
671 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
674 } catch (IndexOutOfBoundsException e) {
675 currentPosition = temp;
679 public int getCastOrParen() {
680 int tempPosition = currentPosition;
681 char tempCharacter = currentCharacter;
682 int tempToken = TokenNameLPAREN;
683 boolean found = false;
684 StringBuffer buf = new StringBuffer();
687 currentCharacter = source[currentPosition++];
688 } while (currentCharacter == ' ' || currentCharacter == '\t');
689 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
690 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
691 buf.append(currentCharacter);
692 currentCharacter = source[currentPosition++];
694 if (buf.length() >= 3 && buf.length() <= 7) {
695 char[] data = buf.toString().toCharArray();
697 switch (data.length) {
700 if ((data[index] == 'i') && (data[++index] == 'n')
701 && (data[++index] == 't')) {
703 tempToken = TokenNameintCAST;
708 if ((data[index] == 'b') && (data[++index] == 'o')
709 && (data[++index] == 'o') && (data[++index] == 'l')) {
711 tempToken = TokenNameboolCAST;
714 if ((data[index] == 'r') && (data[++index] == 'e')
715 && (data[++index] == 'a') && (data[++index] == 'l')) {
717 tempToken = TokenNamedoubleCAST;
723 if ((data[index] == 'a') && (data[++index] == 'r')
724 && (data[++index] == 'r') && (data[++index] == 'a')
725 && (data[++index] == 'y')) {
727 tempToken = TokenNamearrayCAST;
730 if ((data[index] == 'u') && (data[++index] == 'n')
731 && (data[++index] == 's') && (data[++index] == 'e')
732 && (data[++index] == 't')) {
734 tempToken = TokenNameunsetCAST;
737 if ((data[index] == 'f') && (data[++index] == 'l')
738 && (data[++index] == 'o') && (data[++index] == 'a')
739 && (data[++index] == 't')) {
741 tempToken = TokenNamedoubleCAST;
747 // object string double
748 if ((data[index] == 'o') && (data[++index] == 'b')
749 && (data[++index] == 'j') && (data[++index] == 'e')
750 && (data[++index] == 'c') && (data[++index] == 't')) {
752 tempToken = TokenNameobjectCAST;
755 if ((data[index] == 's') && (data[++index] == 't')
756 && (data[++index] == 'r') && (data[++index] == 'i')
757 && (data[++index] == 'n') && (data[++index] == 'g')) {
759 tempToken = TokenNamestringCAST;
762 if ((data[index] == 'd') && (data[++index] == 'o')
763 && (data[++index] == 'u') && (data[++index] == 'b')
764 && (data[++index] == 'l') && (data[++index] == 'e')) {
766 tempToken = TokenNamedoubleCAST;
773 if ((data[index] == 'b') && (data[++index] == 'o')
774 && (data[++index] == 'o') && (data[++index] == 'l')
775 && (data[++index] == 'e') && (data[++index] == 'a')
776 && (data[++index] == 'n')) {
778 tempToken = TokenNameboolCAST;
781 if ((data[index] == 'i') && (data[++index] == 'n')
782 && (data[++index] == 't') && (data[++index] == 'e')
783 && (data[++index] == 'g') && (data[++index] == 'e')
784 && (data[++index] == 'r')) {
786 tempToken = TokenNameintCAST;
792 while (currentCharacter == ' ' || currentCharacter == '\t') {
793 currentCharacter = source[currentPosition++];
795 if (currentCharacter == ')') {
800 } catch (IndexOutOfBoundsException e) {
802 currentCharacter = tempCharacter;
803 currentPosition = tempPosition;
804 return TokenNameLPAREN;
806 public int getNextToken() throws InvalidInputException {
807 int htmlPosition = currentPosition;
810 currentCharacter = source[currentPosition++];
811 if (currentCharacter == '<') {
812 if (getNextChar('?')) {
813 currentCharacter = source[currentPosition++];
814 if ((currentCharacter == ' ')
815 || Character.isWhitespace(currentCharacter)) {
817 startPosition = currentPosition;
819 if (tokenizeWhiteSpace) {
820 // && (whiteStart != currentPosition - 1)) {
821 // reposition scanner in case we are interested by spaces as
823 startPosition = htmlPosition;
824 return TokenNameHTML;
827 boolean phpStart = (currentCharacter == 'P')
828 || (currentCharacter == 'p');
830 int test = getNextChar('H', 'h');
832 test = getNextChar('P', 'p');
835 startPosition = currentPosition;
837 if (tokenizeWhiteSpace) {
838 // && (whiteStart != currentPosition - 1)) {
839 // reposition scanner in case we are interested by spaces
841 startPosition = htmlPosition;
842 return TokenNameHTML;
850 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
851 if (recordLineSeparator) {
858 } //-----------------end switch while try--------------------
859 catch (IndexOutOfBoundsException e) {
860 if (tokenizeWhiteSpace) {
861 // && (whiteStart != currentPosition - 1)) {
862 // reposition scanner in case we are interested by spaces as tokens
863 startPosition = htmlPosition;
870 jumpOverMethodBody();
872 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
875 while (true) { //loop for jumping over comments
876 withoutUnicodePtr = 0;
877 //start with a new token (even comment written with unicode )
878 // ---------Consume white space and handles startPosition---------
879 int whiteStart = currentPosition;
880 boolean isWhiteSpace;
882 startPosition = currentPosition;
883 currentCharacter = source[currentPosition++];
884 // if (((currentCharacter = source[currentPosition++]) == '\\')
885 // && (source[currentPosition] == 'u')) {
886 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
888 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
889 checkNonExternalizeString();
890 if (recordLineSeparator) {
896 isWhiteSpace = (currentCharacter == ' ')
897 || Character.isWhitespace(currentCharacter);
899 } while (isWhiteSpace);
900 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
901 // reposition scanner in case we are interested by spaces as tokens
903 startPosition = whiteStart;
904 return TokenNameWHITESPACE;
906 //little trick to get out in the middle of a source compuation
907 if (currentPosition > eofPosition)
909 // ---------Identify the next token-------------
910 switch (currentCharacter) {
912 return getCastOrParen();
914 return TokenNameRPAREN;
916 return TokenNameLBRACE;
918 return TokenNameRBRACE;
920 return TokenNameLBRACKET;
922 return TokenNameRBRACKET;
924 return TokenNameSEMICOLON;
926 return TokenNameCOMMA;
928 if (getNextChar('='))
929 return TokenNameDOT_EQUAL;
930 if (getNextCharAsDigit())
931 return scanNumber(true);
936 if ((test = getNextChar('+', '=')) == 0)
937 return TokenNamePLUS_PLUS;
939 return TokenNamePLUS_EQUAL;
940 return TokenNamePLUS;
945 if ((test = getNextChar('-', '=')) == 0)
946 return TokenNameMINUS_MINUS;
948 return TokenNameMINUS_EQUAL;
949 if (getNextChar('>'))
950 return TokenNameMINUS_GREATER;
951 return TokenNameMINUS;
954 if (getNextChar('='))
955 return TokenNameTWIDDLE_EQUAL;
956 return TokenNameTWIDDLE;
958 if (getNextChar('=')) {
959 if (getNextChar('=')) {
960 return TokenNameNOT_EQUAL_EQUAL;
962 return TokenNameNOT_EQUAL;
966 if (getNextChar('='))
967 return TokenNameMULTIPLY_EQUAL;
968 return TokenNameMULTIPLY;
970 if (getNextChar('='))
971 return TokenNameREMAINDER_EQUAL;
972 return TokenNameREMAINDER;
975 int oldPosition = currentPosition;
977 currentCharacter = source[currentPosition++];
978 } catch (IndexOutOfBoundsException e) {
979 currentPosition = oldPosition;
980 return TokenNameLESS;
982 switch (currentCharacter) {
984 return TokenNameLESS_EQUAL;
986 return TokenNameNOT_EQUAL;
988 if (getNextChar('='))
989 return TokenNameLEFT_SHIFT_EQUAL;
990 if (getNextChar('<')) {
991 int heredocStart = currentPosition;
992 int heredocLength = 0;
993 currentCharacter = source[currentPosition++];
994 if (isPHPIdentifierStart(currentCharacter)) {
995 currentCharacter = source[currentPosition++];
997 return TokenNameERROR;
999 while (isPHPIdentifierPart(currentCharacter)) {
1000 currentCharacter = source[currentPosition++];
1002 heredocLength = currentPosition - heredocStart - 1;
1003 // heredoc end-tag determination
1004 boolean endTag = true;
1007 ch = source[currentPosition++];
1008 if (ch == '\r' || ch == '\n') {
1009 if (recordLineSeparator) {
1010 pushLineSeparator();
1014 for (int i = 0; i < heredocLength; i++) {
1015 if (source[currentPosition + i] != source[heredocStart
1022 currentPosition += heredocLength - 1;
1023 currentCharacter = source[currentPosition++];
1024 break; // do...while loop
1030 return TokenNameHEREDOC;
1032 return TokenNameLEFT_SHIFT;
1034 currentPosition = oldPosition;
1035 return TokenNameLESS;
1040 if ((test = getNextChar('=', '>')) == 0)
1041 return TokenNameGREATER_EQUAL;
1043 if ((test = getNextChar('=', '>')) == 0)
1044 return TokenNameRIGHT_SHIFT_EQUAL;
1045 return TokenNameRIGHT_SHIFT;
1047 return TokenNameGREATER;
1050 if (getNextChar('=')) {
1051 if (getNextChar('=')) {
1052 return TokenNameEQUAL_EQUAL_EQUAL;
1054 return TokenNameEQUAL_EQUAL;
1056 if (getNextChar('>'))
1057 return TokenNameEQUAL_GREATER;
1058 return TokenNameEQUAL;
1062 if ((test = getNextChar('&', '=')) == 0)
1063 return TokenNameAND_AND;
1065 return TokenNameAND_EQUAL;
1066 return TokenNameAND;
1071 if ((test = getNextChar('|', '=')) == 0)
1072 return TokenNameOR_OR;
1074 return TokenNameOR_EQUAL;
1078 if (getNextChar('='))
1079 return TokenNameXOR_EQUAL;
1080 return TokenNameXOR;
1082 if (getNextChar('>')) {
1084 return TokenNameINLINE_HTML;
1086 return TokenNameQUESTION;
1088 if (getNextChar(':'))
1089 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1090 return TokenNameCOLON;
1096 // if ((test = getNextChar('\n', '\r')) == 0) {
1097 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1100 // // relocate if finding another quote fairly close: thus unicode
1101 // '/u000D' will be fully consumed
1102 // for (int lookAhead = 0;
1105 // if (currentPosition + lookAhead
1106 // == source.length)
1108 // if (source[currentPosition + lookAhead]
1111 // if (source[currentPosition + lookAhead]
1113 // currentPosition += lookAhead + 1;
1117 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1120 // if (getNextChar('\'')) {
1121 // // relocate if finding another quote fairly close: thus unicode
1122 // '/u000D' will be fully consumed
1123 // for (int lookAhead = 0;
1126 // if (currentPosition + lookAhead
1127 // == source.length)
1129 // if (source[currentPosition + lookAhead]
1132 // if (source[currentPosition + lookAhead]
1134 // currentPosition += lookAhead + 1;
1138 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1140 // if (getNextChar('\\'))
1141 // scanEscapeCharacter();
1142 // else { // consume next character
1143 // unicodeAsBackSlash = false;
1144 // if (((currentCharacter = source[currentPosition++])
1146 // && (source[currentPosition] == 'u')) {
1147 // getNextUnicodeChar();
1149 // if (withoutUnicodePtr != 0) {
1150 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1151 // currentCharacter;
1155 // // if (getNextChar('\''))
1156 // // return TokenNameCharacterLiteral;
1157 // // relocate if finding another quote fairly close: thus unicode
1158 // '/u000D' will be fully consumed
1159 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1160 // if (currentPosition + lookAhead == source.length)
1162 // if (source[currentPosition + lookAhead] == '\n')
1164 // if (source[currentPosition + lookAhead] == '\'') {
1165 // currentPosition += lookAhead + 1;
1169 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1172 // consume next character
1173 unicodeAsBackSlash = false;
1174 currentCharacter = source[currentPosition++];
1175 // if (((currentCharacter = source[currentPosition++]) == '\\')
1176 // && (source[currentPosition] == 'u')) {
1177 // getNextUnicodeChar();
1179 // if (withoutUnicodePtr != 0) {
1180 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1181 // currentCharacter;
1184 while (currentCharacter != '\'') {
1185 /** ** in PHP \r and \n are valid in string literals *** */
1186 // if ((currentCharacter == '\n')
1187 // || (currentCharacter == '\r')) {
1188 // // relocate if finding another quote fairly close: thus
1189 // unicode '/u000D' will be fully consumed
1190 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1191 // if (currentPosition + lookAhead == source.length)
1193 // if (source[currentPosition + lookAhead] == '\n')
1195 // if (source[currentPosition + lookAhead] == '\"') {
1196 // currentPosition += lookAhead + 1;
1200 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1202 if (currentCharacter == '\\') {
1203 int escapeSize = currentPosition;
1204 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1205 //scanEscapeCharacter make a side effect on this value and
1206 // we need the previous value few lines down this one
1207 scanSingleQuotedEscapeCharacter();
1208 escapeSize = currentPosition - escapeSize;
1209 if (withoutUnicodePtr == 0) {
1210 //buffer all the entries that have been left aside....
1211 withoutUnicodePtr = currentPosition - escapeSize - 1
1213 System.arraycopy(source, startPosition,
1214 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1215 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1216 } else { //overwrite the / in the buffer
1217 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1218 if (backSlashAsUnicodeInString) { //there are TWO \ in
1220 // only one is correct
1221 withoutUnicodePtr--;
1225 // consume next character
1226 unicodeAsBackSlash = false;
1227 currentCharacter = source[currentPosition++];
1228 // if (((currentCharacter = source[currentPosition++]) ==
1230 // && (source[currentPosition] == 'u')) {
1231 // getNextUnicodeChar();
1233 if (withoutUnicodePtr != 0) {
1234 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1238 } catch (IndexOutOfBoundsException e) {
1239 throw new InvalidInputException(UNTERMINATED_STRING);
1240 } catch (InvalidInputException e) {
1241 if (e.getMessage().equals(INVALID_ESCAPE)) {
1242 // relocate if finding another quote fairly close: thus
1243 // unicode '/u000D' will be fully consumed
1244 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1245 if (currentPosition + lookAhead == source.length)
1247 if (source[currentPosition + lookAhead] == '\n')
1249 if (source[currentPosition + lookAhead] == '\'') {
1250 currentPosition += lookAhead + 1;
1257 if (checkNonExternalizedStringLiterals) { // check for presence
1259 // //$NON-NLS-?$ where
1261 if (currentLine == null) {
1262 currentLine = new NLSLine();
1263 lines.add(currentLine);
1265 currentLine.add(new StringLiteral(
1266 getCurrentTokenSourceString(), startPosition,
1267 currentPosition - 1));
1269 return TokenNameStringConstant;
1272 // consume next character
1273 unicodeAsBackSlash = false;
1274 currentCharacter = source[currentPosition++];
1275 // if (((currentCharacter = source[currentPosition++]) == '\\')
1276 // && (source[currentPosition] == 'u')) {
1277 // getNextUnicodeChar();
1279 // if (withoutUnicodePtr != 0) {
1280 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1281 // currentCharacter;
1284 while (currentCharacter != '"') {
1285 /** ** in PHP \r and \n are valid in string literals *** */
1286 // if ((currentCharacter == '\n')
1287 // || (currentCharacter == '\r')) {
1288 // // relocate if finding another quote fairly close: thus
1289 // unicode '/u000D' will be fully consumed
1290 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1291 // if (currentPosition + lookAhead == source.length)
1293 // if (source[currentPosition + lookAhead] == '\n')
1295 // if (source[currentPosition + lookAhead] == '\"') {
1296 // currentPosition += lookAhead + 1;
1300 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1302 if (currentCharacter == '\\') {
1303 int escapeSize = currentPosition;
1304 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1305 //scanEscapeCharacter make a side effect on this value and
1306 // we need the previous value few lines down this one
1307 scanDoubleQuotedEscapeCharacter();
1308 escapeSize = currentPosition - escapeSize;
1309 if (withoutUnicodePtr == 0) {
1310 //buffer all the entries that have been left aside....
1311 withoutUnicodePtr = currentPosition - escapeSize - 1
1313 System.arraycopy(source, startPosition,
1314 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1315 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1316 } else { //overwrite the / in the buffer
1317 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1318 if (backSlashAsUnicodeInString) { //there are TWO \ in
1320 // only one is correct
1321 withoutUnicodePtr--;
1325 // consume next character
1326 unicodeAsBackSlash = false;
1327 currentCharacter = source[currentPosition++];
1328 // if (((currentCharacter = source[currentPosition++]) ==
1330 // && (source[currentPosition] == 'u')) {
1331 // getNextUnicodeChar();
1333 if (withoutUnicodePtr != 0) {
1334 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1338 } catch (IndexOutOfBoundsException e) {
1339 throw new InvalidInputException(UNTERMINATED_STRING);
1340 } catch (InvalidInputException e) {
1341 if (e.getMessage().equals(INVALID_ESCAPE)) {
1342 // relocate if finding another quote fairly close: thus
1343 // unicode '/u000D' will be fully consumed
1344 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1345 if (currentPosition + lookAhead == source.length)
1347 if (source[currentPosition + lookAhead] == '\n')
1349 if (source[currentPosition + lookAhead] == '\"') {
1350 currentPosition += lookAhead + 1;
1357 if (checkNonExternalizedStringLiterals) { // check for presence
1359 // //$NON-NLS-?$ where
1361 if (currentLine == null) {
1362 currentLine = new NLSLine();
1363 lines.add(currentLine);
1365 currentLine.add(new StringLiteral(
1366 getCurrentTokenSourceString(), startPosition,
1367 currentPosition - 1));
1369 return TokenNameStringLiteral;
1372 // consume next character
1373 unicodeAsBackSlash = false;
1374 currentCharacter = source[currentPosition++];
1375 // if (((currentCharacter = source[currentPosition++]) == '\\')
1376 // && (source[currentPosition] == 'u')) {
1377 // getNextUnicodeChar();
1379 // if (withoutUnicodePtr != 0) {
1380 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1381 // currentCharacter;
1384 while (currentCharacter != '`') {
1385 /** ** in PHP \r and \n are valid in string literals *** */
1386 // if ((currentCharacter == '\n')
1387 // || (currentCharacter == '\r')) {
1388 // // relocate if finding another quote fairly close: thus
1389 // unicode '/u000D' will be fully consumed
1390 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1391 // if (currentPosition + lookAhead == source.length)
1393 // if (source[currentPosition + lookAhead] == '\n')
1395 // if (source[currentPosition + lookAhead] == '\"') {
1396 // currentPosition += lookAhead + 1;
1400 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1402 if (currentCharacter == '\\') {
1403 int escapeSize = currentPosition;
1404 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1405 //scanEscapeCharacter make a side effect on this value and
1406 // we need the previous value few lines down this one
1407 scanDoubleQuotedEscapeCharacter();
1408 escapeSize = currentPosition - escapeSize;
1409 if (withoutUnicodePtr == 0) {
1410 //buffer all the entries that have been left aside....
1411 withoutUnicodePtr = currentPosition - escapeSize - 1
1413 System.arraycopy(source, startPosition,
1414 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1415 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1416 } else { //overwrite the / in the buffer
1417 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1418 if (backSlashAsUnicodeInString) { //there are TWO \ in
1420 // only one is correct
1421 withoutUnicodePtr--;
1425 // consume next character
1426 unicodeAsBackSlash = false;
1427 currentCharacter = source[currentPosition++];
1428 // if (((currentCharacter = source[currentPosition++]) ==
1430 // && (source[currentPosition] == 'u')) {
1431 // getNextUnicodeChar();
1433 if (withoutUnicodePtr != 0) {
1434 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1438 } catch (IndexOutOfBoundsException e) {
1439 throw new InvalidInputException(UNTERMINATED_STRING);
1440 } catch (InvalidInputException e) {
1441 if (e.getMessage().equals(INVALID_ESCAPE)) {
1442 // relocate if finding another quote fairly close: thus
1443 // unicode '/u000D' will be fully consumed
1444 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1445 if (currentPosition + lookAhead == source.length)
1447 if (source[currentPosition + lookAhead] == '\n')
1449 if (source[currentPosition + lookAhead] == '`') {
1450 currentPosition += lookAhead + 1;
1457 if (checkNonExternalizedStringLiterals) { // check for presence
1459 // //$NON-NLS-?$ where
1461 if (currentLine == null) {
1462 currentLine = new NLSLine();
1463 lines.add(currentLine);
1465 currentLine.add(new StringLiteral(
1466 getCurrentTokenSourceString(), startPosition,
1467 currentPosition - 1));
1469 return TokenNameStringInterpolated;
1473 char startChar = currentCharacter;
1474 if (getNextChar('=')) {
1475 return TokenNameDIVIDE_EQUAL;
1478 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1480 int endPositionForLineComment = 0;
1481 try { //get the next char
1482 currentCharacter = source[currentPosition++];
1483 // if (((currentCharacter = source[currentPosition++])
1485 // && (source[currentPosition] == 'u')) {
1486 // //-------------unicode traitement ------------
1487 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1488 // currentPosition++;
1489 // while (source[currentPosition] == 'u') {
1490 // currentPosition++;
1493 // Character.getNumericValue(source[currentPosition++]))
1497 // Character.getNumericValue(source[currentPosition++]))
1501 // Character.getNumericValue(source[currentPosition++]))
1505 // Character.getNumericValue(source[currentPosition++]))
1508 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1510 // currentCharacter =
1511 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1514 //handle the \\u case manually into comment
1515 // if (currentCharacter == '\\') {
1516 // if (source[currentPosition] == '\\')
1517 // currentPosition++;
1518 // } //jump over the \\
1519 boolean isUnicode = false;
1520 while (currentCharacter != '\r' && currentCharacter != '\n') {
1521 if (currentCharacter == '?') {
1522 if (getNextChar('>')) {
1523 startPosition = currentPosition - 2;
1525 return TokenNameINLINE_HTML;
1530 currentCharacter = source[currentPosition++];
1531 // if (((currentCharacter = source[currentPosition++])
1533 // && (source[currentPosition] == 'u')) {
1534 // isUnicode = true;
1535 // //-------------unicode traitement ------------
1536 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1537 // currentPosition++;
1538 // while (source[currentPosition] == 'u') {
1539 // currentPosition++;
1542 // Character.getNumericValue(source[currentPosition++]))
1546 // Character.getNumericValue(
1547 // source[currentPosition++]))
1551 // Character.getNumericValue(
1552 // source[currentPosition++]))
1556 // Character.getNumericValue(
1557 // source[currentPosition++]))
1561 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1563 // currentCharacter =
1564 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1567 //handle the \\u case manually into comment
1568 // if (currentCharacter == '\\') {
1569 // if (source[currentPosition] == '\\')
1570 // currentPosition++;
1571 // } //jump over the \\
1574 endPositionForLineComment = currentPosition - 6;
1576 endPositionForLineComment = currentPosition - 1;
1578 recordComment(false);
1579 if ((currentCharacter == '\r')
1580 || (currentCharacter == '\n')) {
1581 checkNonExternalizeString();
1582 if (recordLineSeparator) {
1584 pushUnicodeLineSeparator();
1586 pushLineSeparator();
1592 if (tokenizeComments) {
1594 currentPosition = endPositionForLineComment;
1595 // reset one character behind
1597 return TokenNameCOMMENT_LINE;
1599 } catch (IndexOutOfBoundsException e) { //an eof will them
1601 if (tokenizeComments) {
1603 // reset one character behind
1604 return TokenNameCOMMENT_LINE;
1610 //traditional and annotation comment
1611 boolean isJavadoc = false, star = false;
1612 // consume next character
1613 unicodeAsBackSlash = false;
1614 currentCharacter = source[currentPosition++];
1615 // if (((currentCharacter = source[currentPosition++]) ==
1617 // && (source[currentPosition] == 'u')) {
1618 // getNextUnicodeChar();
1620 // if (withoutUnicodePtr != 0) {
1621 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1622 // currentCharacter;
1625 if (currentCharacter == '*') {
1629 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1630 checkNonExternalizeString();
1631 if (recordLineSeparator) {
1632 pushLineSeparator();
1637 try { //get the next char
1638 currentCharacter = source[currentPosition++];
1639 // if (((currentCharacter = source[currentPosition++])
1641 // && (source[currentPosition] == 'u')) {
1642 // //-------------unicode traitement ------------
1643 // getNextUnicodeChar();
1645 //handle the \\u case manually into comment
1646 // if (currentCharacter == '\\') {
1647 // if (source[currentPosition] == '\\')
1648 // currentPosition++;
1649 // //jump over the \\
1651 // empty comment is not a javadoc /**/
1652 if (currentCharacter == '/') {
1655 //loop until end of comment */
1656 while ((currentCharacter != '/') || (!star)) {
1657 if ((currentCharacter == '\r')
1658 || (currentCharacter == '\n')) {
1659 checkNonExternalizeString();
1660 if (recordLineSeparator) {
1661 pushLineSeparator();
1666 star = currentCharacter == '*';
1668 currentCharacter = source[currentPosition++];
1669 // if (((currentCharacter = source[currentPosition++])
1671 // && (source[currentPosition] == 'u')) {
1672 // //-------------unicode traitement ------------
1673 // getNextUnicodeChar();
1675 //handle the \\u case manually into comment
1676 // if (currentCharacter == '\\') {
1677 // if (source[currentPosition] == '\\')
1678 // currentPosition++;
1679 // } //jump over the \\
1681 recordComment(isJavadoc);
1682 if (tokenizeComments) {
1684 return TokenNameCOMMENT_PHPDOC;
1685 return TokenNameCOMMENT_BLOCK;
1687 } catch (IndexOutOfBoundsException e) {
1688 throw new InvalidInputException(UNTERMINATED_COMMENT);
1692 return TokenNameDIVIDE;
1696 return TokenNameEOF;
1697 //the atEnd may not be <currentPosition == source.length> if
1698 // source is only some part of a real (external) stream
1699 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1701 if (currentCharacter == '$') {
1702 int oldPosition = currentPosition;
1704 currentCharacter = source[currentPosition++];
1706 if (isPHPIdentifierStart(currentCharacter)) {
1707 return scanIdentifierOrKeyword(true);
1709 currentPosition = oldPosition;
1710 return TokenNameDOLLAR;
1712 } catch (IndexOutOfBoundsException e) {
1713 currentPosition = oldPosition;
1714 return TokenNameDOLLAR;
1717 if (isPHPIdentifierStart(currentCharacter))
1718 return scanIdentifierOrKeyword(false);
1719 if (Character.isDigit(currentCharacter))
1720 return scanNumber(false);
1721 return TokenNameERROR;
1724 } //-----------------end switch while try--------------------
1725 catch (IndexOutOfBoundsException e) {
1728 return TokenNameEOF;
1730 // public final void getNextUnicodeChar()
1731 // throws IndexOutOfBoundsException, InvalidInputException {
1733 // //handle the case of unicode.
1734 // //when a unicode appears then we must use a buffer that holds char
1736 // //At the end of this method currentCharacter holds the new visited char
1737 // //and currentPosition points right next after it
1739 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1741 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1742 // currentPosition++;
1743 // while (source[currentPosition] == 'u') {
1744 // currentPosition++;
1748 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1750 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1752 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1754 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1756 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1758 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1759 // //need the unicode buffer
1760 // if (withoutUnicodePtr == 0) {
1761 // //buffer all the entries that have been left aside....
1762 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1763 // System.arraycopy(
1766 // withoutUnicodeBuffer,
1768 // withoutUnicodePtr);
1770 // //fill the buffer with the char
1771 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1773 // unicodeAsBackSlash = currentCharacter == '\\';
1776 * Tokenize a method body, assuming that curly brackets are properly
1779 public final void jumpOverMethodBody() {
1780 this.wasAcr = false;
1783 while (true) { //loop for jumping over comments
1784 // ---------Consume white space and handles startPosition---------
1785 boolean isWhiteSpace;
1787 startPosition = currentPosition;
1788 currentCharacter = source[currentPosition++];
1789 // if (((currentCharacter = source[currentPosition++]) == '\\')
1790 // && (source[currentPosition] == 'u')) {
1791 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1793 if (recordLineSeparator
1794 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1795 pushLineSeparator();
1796 isWhiteSpace = Character.isWhitespace(currentCharacter);
1798 } while (isWhiteSpace);
1799 // -------consume token until } is found---------
1800 switch (currentCharacter) {
1812 test = getNextChar('\\');
1815 scanDoubleQuotedEscapeCharacter();
1816 } catch (InvalidInputException ex) {
1819 // try { // consume next character
1820 unicodeAsBackSlash = false;
1821 currentCharacter = source[currentPosition++];
1822 // if (((currentCharacter = source[currentPosition++]) == '\\')
1823 // && (source[currentPosition] == 'u')) {
1824 // getNextUnicodeChar();
1826 if (withoutUnicodePtr != 0) {
1827 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1830 // } catch (InvalidInputException ex) {
1838 // try { // consume next character
1839 unicodeAsBackSlash = false;
1840 currentCharacter = source[currentPosition++];
1841 // if (((currentCharacter = source[currentPosition++]) == '\\')
1842 // && (source[currentPosition] == 'u')) {
1843 // getNextUnicodeChar();
1845 if (withoutUnicodePtr != 0) {
1846 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1849 // } catch (InvalidInputException ex) {
1851 while (currentCharacter != '"') {
1852 if (currentCharacter == '\r') {
1853 if (source[currentPosition] == '\n')
1856 // the string cannot go further that the line
1858 if (currentCharacter == '\n') {
1860 // the string cannot go further that the line
1862 if (currentCharacter == '\\') {
1864 scanDoubleQuotedEscapeCharacter();
1865 } catch (InvalidInputException ex) {
1868 // try { // consume next character
1869 unicodeAsBackSlash = false;
1870 currentCharacter = source[currentPosition++];
1871 // if (((currentCharacter = source[currentPosition++]) == '\\')
1872 // && (source[currentPosition] == 'u')) {
1873 // getNextUnicodeChar();
1875 if (withoutUnicodePtr != 0) {
1876 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1879 // } catch (InvalidInputException ex) {
1882 } catch (IndexOutOfBoundsException e) {
1889 if ((test = getNextChar('/', '*')) == 0) {
1893 currentCharacter = source[currentPosition++];
1894 // if (((currentCharacter = source[currentPosition++]) ==
1896 // && (source[currentPosition] == 'u')) {
1897 // //-------------unicode traitement ------------
1898 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1899 // currentPosition++;
1900 // while (source[currentPosition] == 'u') {
1901 // currentPosition++;
1904 // Character.getNumericValue(source[currentPosition++]))
1908 // Character.getNumericValue(source[currentPosition++]))
1912 // Character.getNumericValue(source[currentPosition++]))
1916 // Character.getNumericValue(source[currentPosition++]))
1919 // //error don't care of the value
1920 // currentCharacter = 'A';
1921 // } //something different from \n and \r
1923 // currentCharacter =
1924 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1927 while (currentCharacter != '\r' && currentCharacter != '\n') {
1929 currentCharacter = source[currentPosition++];
1930 // if (((currentCharacter = source[currentPosition++])
1932 // && (source[currentPosition] == 'u')) {
1933 // //-------------unicode traitement ------------
1934 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1935 // currentPosition++;
1936 // while (source[currentPosition] == 'u') {
1937 // currentPosition++;
1940 // Character.getNumericValue(source[currentPosition++]))
1944 // Character.getNumericValue(source[currentPosition++]))
1948 // Character.getNumericValue(source[currentPosition++]))
1952 // Character.getNumericValue(source[currentPosition++]))
1955 // //error don't care of the value
1956 // currentCharacter = 'A';
1957 // } //something different from \n and \r
1959 // currentCharacter =
1960 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1964 if (recordLineSeparator
1965 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1966 pushLineSeparator();
1967 } catch (IndexOutOfBoundsException e) {
1968 } //an eof will them be generated
1972 //traditional and annotation comment
1973 boolean star = false;
1974 // try { // consume next character
1975 unicodeAsBackSlash = false;
1976 currentCharacter = source[currentPosition++];
1977 // if (((currentCharacter = source[currentPosition++]) == '\\')
1978 // && (source[currentPosition] == 'u')) {
1979 // getNextUnicodeChar();
1981 if (withoutUnicodePtr != 0) {
1982 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1985 // } catch (InvalidInputException ex) {
1987 if (currentCharacter == '*') {
1990 if (recordLineSeparator
1991 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1992 pushLineSeparator();
1993 try { //get the next char
1994 currentCharacter = source[currentPosition++];
1995 // if (((currentCharacter = source[currentPosition++]) ==
1997 // && (source[currentPosition] == 'u')) {
1998 // //-------------unicode traitement ------------
1999 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2000 // currentPosition++;
2001 // while (source[currentPosition] == 'u') {
2002 // currentPosition++;
2005 // Character.getNumericValue(source[currentPosition++]))
2009 // Character.getNumericValue(source[currentPosition++]))
2013 // Character.getNumericValue(source[currentPosition++]))
2017 // Character.getNumericValue(source[currentPosition++]))
2020 // //error don't care of the value
2021 // currentCharacter = 'A';
2022 // } //something different from * and /
2024 // currentCharacter =
2025 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2028 //loop until end of comment */
2029 while ((currentCharacter != '/') || (!star)) {
2030 if (recordLineSeparator
2031 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2032 pushLineSeparator();
2033 star = currentCharacter == '*';
2035 currentCharacter = source[currentPosition++];
2036 // if (((currentCharacter = source[currentPosition++])
2038 // && (source[currentPosition] == 'u')) {
2039 // //-------------unicode traitement ------------
2040 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2041 // currentPosition++;
2042 // while (source[currentPosition] == 'u') {
2043 // currentPosition++;
2046 // Character.getNumericValue(source[currentPosition++]))
2050 // Character.getNumericValue(source[currentPosition++]))
2054 // Character.getNumericValue(source[currentPosition++]))
2058 // Character.getNumericValue(source[currentPosition++]))
2061 // //error don't care of the value
2062 // currentCharacter = 'A';
2063 // } //something different from * and /
2065 // currentCharacter =
2066 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2070 } catch (IndexOutOfBoundsException e) {
2078 if (isPHPIdentifierStart(currentCharacter)
2079 || currentCharacter == '$') {
2081 scanIdentifierOrKeyword((currentCharacter == '$'));
2082 } catch (InvalidInputException ex) {
2086 if (Character.isDigit(currentCharacter)) {
2089 } catch (InvalidInputException ex) {
2095 //-----------------end switch while try--------------------
2096 } catch (IndexOutOfBoundsException e) {
2097 } catch (InvalidInputException e) {
2101 // public final boolean jumpOverUnicodeWhiteSpace()
2102 // throws InvalidInputException {
2104 // //handle the case of unicode. Jump over the next whiteSpace
2105 // //making startPosition pointing on the next available char
2106 // //On false, the currentCharacter is filled up with a potential
2110 // this.wasAcr = false;
2111 // int c1, c2, c3, c4;
2112 // int unicodeSize = 6;
2113 // currentPosition++;
2114 // while (source[currentPosition] == 'u') {
2115 // currentPosition++;
2119 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2121 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2123 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2125 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2127 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2130 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2131 // if (recordLineSeparator
2132 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2133 // pushLineSeparator();
2134 // if (Character.isWhitespace(currentCharacter))
2137 // //buffer the new char which is not a white space
2138 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2139 // //withoutUnicodePtr == 1 is true here
2141 // } catch (IndexOutOfBoundsException e) {
2142 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2145 public final int[] getLineEnds() {
2146 //return a bounded copy of this.lineEnds
2148 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2151 public char[] getSource() {
2154 final char[] optimizedCurrentTokenSource1() {
2155 //return always the same char[] build only once
2156 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2157 char charOne = source[startPosition];
2212 return new char[]{charOne};
2215 final char[] optimizedCurrentTokenSource2() {
2216 //try to return the same char[] build only once
2218 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2220 char[][] table = charArray_length[0][hash];
2222 while (++i < InternalTableSize) {
2223 char[] charArray = table[i];
2224 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2227 //---------other side---------
2229 int max = newEntry2;
2230 while (++i <= max) {
2231 char[] charArray = table[i];
2232 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2235 //--------add the entry-------
2236 if (++max >= InternalTableSize)
2239 table[max] = (r = new char[]{c0, c1});
2243 final char[] optimizedCurrentTokenSource3() {
2244 //try to return the same char[] build only once
2246 int hash = (((c0 = source[startPosition]) << 12)
2247 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2249 char[][] table = charArray_length[1][hash];
2251 while (++i < InternalTableSize) {
2252 char[] charArray = table[i];
2253 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2256 //---------other side---------
2258 int max = newEntry3;
2259 while (++i <= max) {
2260 char[] charArray = table[i];
2261 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2264 //--------add the entry-------
2265 if (++max >= InternalTableSize)
2268 table[max] = (r = new char[]{c0, c1, c2});
2272 final char[] optimizedCurrentTokenSource4() {
2273 //try to return the same char[] build only once
2274 char c0, c1, c2, c3;
2275 long hash = ((((long) (c0 = source[startPosition])) << 18)
2276 + ((c1 = source[startPosition + 1]) << 12)
2277 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2279 char[][] table = charArray_length[2][(int) hash];
2281 while (++i < InternalTableSize) {
2282 char[] charArray = table[i];
2283 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2284 && (c3 == charArray[3]))
2287 //---------other side---------
2289 int max = newEntry4;
2290 while (++i <= max) {
2291 char[] charArray = table[i];
2292 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2293 && (c3 == charArray[3]))
2296 //--------add the entry-------
2297 if (++max >= InternalTableSize)
2300 table[max] = (r = new char[]{c0, c1, c2, c3});
2304 final char[] optimizedCurrentTokenSource5() {
2305 //try to return the same char[] build only once
2306 char c0, c1, c2, c3, c4;
2307 long hash = ((((long) (c0 = source[startPosition])) << 24)
2308 + (((long) (c1 = source[startPosition + 1])) << 18)
2309 + ((c2 = source[startPosition + 2]) << 12)
2310 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2312 char[][] table = charArray_length[3][(int) hash];
2314 while (++i < InternalTableSize) {
2315 char[] charArray = table[i];
2316 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2317 && (c3 == charArray[3]) && (c4 == charArray[4]))
2320 //---------other side---------
2322 int max = newEntry5;
2323 while (++i <= max) {
2324 char[] charArray = table[i];
2325 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2326 && (c3 == charArray[3]) && (c4 == charArray[4]))
2329 //--------add the entry-------
2330 if (++max >= InternalTableSize)
2333 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2337 final char[] optimizedCurrentTokenSource6() {
2338 //try to return the same char[] build only once
2339 char c0, c1, c2, c3, c4, c5;
2340 long hash = ((((long) (c0 = source[startPosition])) << 32)
2341 + (((long) (c1 = source[startPosition + 1])) << 24)
2342 + (((long) (c2 = source[startPosition + 2])) << 18)
2343 + ((c3 = source[startPosition + 3]) << 12)
2344 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2346 char[][] table = charArray_length[4][(int) hash];
2348 while (++i < InternalTableSize) {
2349 char[] charArray = table[i];
2350 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2351 && (c3 == charArray[3]) && (c4 == charArray[4])
2352 && (c5 == charArray[5]))
2355 //---------other side---------
2357 int max = newEntry6;
2358 while (++i <= max) {
2359 char[] charArray = table[i];
2360 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2361 && (c3 == charArray[3]) && (c4 == charArray[4])
2362 && (c5 == charArray[5]))
2365 //--------add the entry-------
2366 if (++max >= InternalTableSize)
2369 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2373 public final void pushLineSeparator() throws InvalidInputException {
2374 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2375 final int INCREMENT = 250;
2376 if (this.checkNonExternalizedStringLiterals) {
2377 // reinitialize the current line for non externalize strings purpose
2380 //currentCharacter is at position currentPosition-1
2382 if (currentCharacter == '\r') {
2383 int separatorPos = currentPosition - 1;
2384 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2386 //System.out.println("CR-" + separatorPos);
2388 lineEnds[++linePtr] = separatorPos;
2389 } catch (IndexOutOfBoundsException e) {
2390 //linePtr value is correct
2391 int oldLength = lineEnds.length;
2392 int[] old = lineEnds;
2393 lineEnds = new int[oldLength + INCREMENT];
2394 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2395 lineEnds[linePtr] = separatorPos;
2397 // look-ahead for merged cr+lf
2399 if (source[currentPosition] == '\n') {
2400 //System.out.println("look-ahead LF-" + currentPosition);
2401 lineEnds[linePtr] = currentPosition;
2407 } catch (IndexOutOfBoundsException e) {
2412 if (currentCharacter == '\n') {
2413 //must merge eventual cr followed by lf
2414 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2415 //System.out.println("merge LF-" + (currentPosition - 1));
2416 lineEnds[linePtr] = currentPosition - 1;
2418 int separatorPos = currentPosition - 1;
2419 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2421 // System.out.println("LF-" + separatorPos);
2423 lineEnds[++linePtr] = separatorPos;
2424 } catch (IndexOutOfBoundsException e) {
2425 //linePtr value is correct
2426 int oldLength = lineEnds.length;
2427 int[] old = lineEnds;
2428 lineEnds = new int[oldLength + INCREMENT];
2429 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2430 lineEnds[linePtr] = separatorPos;
2437 public final void pushUnicodeLineSeparator() {
2438 // isUnicode means that the \r or \n has been read as a unicode character
2439 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2440 final int INCREMENT = 250;
2441 //currentCharacter is at position currentPosition-1
2442 if (this.checkNonExternalizedStringLiterals) {
2443 // reinitialize the current line for non externalize strings purpose
2447 if (currentCharacter == '\r') {
2448 int separatorPos = currentPosition - 6;
2449 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2451 //System.out.println("CR-" + separatorPos);
2453 lineEnds[++linePtr] = separatorPos;
2454 } catch (IndexOutOfBoundsException e) {
2455 //linePtr value is correct
2456 int oldLength = lineEnds.length;
2457 int[] old = lineEnds;
2458 lineEnds = new int[oldLength + INCREMENT];
2459 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2460 lineEnds[linePtr] = separatorPos;
2462 // look-ahead for merged cr+lf
2463 if (source[currentPosition] == '\n') {
2464 //System.out.println("look-ahead LF-" + currentPosition);
2465 lineEnds[linePtr] = currentPosition;
2473 if (currentCharacter == '\n') {
2474 //must merge eventual cr followed by lf
2475 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2476 //System.out.println("merge LF-" + (currentPosition - 1));
2477 lineEnds[linePtr] = currentPosition - 6;
2479 int separatorPos = currentPosition - 6;
2480 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2482 // System.out.println("LF-" + separatorPos);
2484 lineEnds[++linePtr] = separatorPos;
2485 } catch (IndexOutOfBoundsException e) {
2486 //linePtr value is correct
2487 int oldLength = lineEnds.length;
2488 int[] old = lineEnds;
2489 lineEnds = new int[oldLength + INCREMENT];
2490 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2491 lineEnds[linePtr] = separatorPos;
2498 public final void recordComment(boolean isJavadoc) {
2499 // a new annotation comment is recorded
2501 commentStops[++commentPtr] = isJavadoc
2504 } catch (IndexOutOfBoundsException e) {
2505 int oldStackLength = commentStops.length;
2506 int[] oldStack = commentStops;
2507 commentStops = new int[oldStackLength + 30];
2508 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2509 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2510 //grows the positions buffers too
2511 int[] old = commentStarts;
2512 commentStarts = new int[oldStackLength + 30];
2513 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2515 //the buffer is of a correct size here
2516 commentStarts[commentPtr] = startPosition;
2518 public void resetTo(int begin, int end) {
2519 //reset the scanner to a given position where it may rescan again
2521 initialPosition = startPosition = currentPosition = begin;
2522 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2523 commentPtr = -1; // reset comment stack
2525 public final void scanSingleQuotedEscapeCharacter()
2526 throws InvalidInputException {
2527 // the string with "\\u" is a legal string of two chars \ and u
2528 //thus we use a direct access to the source (for regular cases).
2529 // if (unicodeAsBackSlash) {
2530 // // consume next character
2531 // unicodeAsBackSlash = false;
2532 // if (((currentCharacter = source[currentPosition++]) == '\\')
2533 // && (source[currentPosition] == 'u')) {
2534 // getNextUnicodeChar();
2536 // if (withoutUnicodePtr != 0) {
2537 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2541 currentCharacter = source[currentPosition++];
2542 switch (currentCharacter) {
2544 currentCharacter = '\'';
2547 currentCharacter = '\\';
2550 currentCharacter = '\\';
2554 public final void scanDoubleQuotedEscapeCharacter()
2555 throws InvalidInputException {
2556 // the string with "\\u" is a legal string of two chars \ and u
2557 //thus we use a direct access to the source (for regular cases).
2558 // if (unicodeAsBackSlash) {
2559 // // consume next character
2560 // unicodeAsBackSlash = false;
2561 // if (((currentCharacter = source[currentPosition++]) == '\\')
2562 // && (source[currentPosition] == 'u')) {
2563 // getNextUnicodeChar();
2565 // if (withoutUnicodePtr != 0) {
2566 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2570 currentCharacter = source[currentPosition++];
2571 switch (currentCharacter) {
2573 // currentCharacter = '\b';
2576 currentCharacter = '\t';
2579 currentCharacter = '\n';
2582 // currentCharacter = '\f';
2585 currentCharacter = '\r';
2588 currentCharacter = '\"';
2591 currentCharacter = '\'';
2594 currentCharacter = '\\';
2597 currentCharacter = '$';
2600 // -----------octal escape--------------
2602 // OctalDigit OctalDigit
2603 // ZeroToThree OctalDigit OctalDigit
2604 int number = Character.getNumericValue(currentCharacter);
2605 if (number >= 0 && number <= 7) {
2606 boolean zeroToThreeNot = number > 3;
2607 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2608 int digit = Character.getNumericValue(currentCharacter);
2609 if (digit >= 0 && digit <= 7) {
2610 number = (number * 8) + digit;
2612 .isDigit(currentCharacter = source[currentPosition++])) {
2613 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2614 // Digit --> ignore last character
2617 digit = Character.getNumericValue(currentCharacter);
2618 if (digit >= 0 && digit <= 7) {
2619 // has read \ZeroToThree OctalDigit OctalDigit
2620 number = (number * 8) + digit;
2621 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2622 // --> ignore last character
2626 } else { // has read \OctalDigit NonDigit--> ignore last
2630 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2634 } else { // has read \OctalDigit --> ignore last character
2638 throw new InvalidInputException(INVALID_ESCAPE);
2639 currentCharacter = (char) number;
2642 // throw new InvalidInputException(INVALID_ESCAPE);
2645 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2646 // return scanIdentifierOrKeyword( false );
2648 public int scanIdentifierOrKeyword(boolean isVariable)
2649 throws InvalidInputException {
2651 //first dispatch on the first char.
2652 //then the length. If there are several
2653 //keywors with the same length AND the same first char, then do another
2654 //disptach on the second char :-)...cool....but fast !
2655 useAssertAsAnIndentifier = false;
2656 while (getNextCharAsJavaIdentifierPart()) {
2659 // if (new String(getCurrentTokenSource()).equals("$this")) {
2660 // return TokenNamethis;
2662 return TokenNameVariable;
2667 // if (withoutUnicodePtr == 0)
2668 //quick test on length == 1 but not on length > 12 while most identifier
2669 //have a length which is <= 12...but there are lots of identifier with
2672 if ((length = currentPosition - startPosition) == 1)
2673 return TokenNameIdentifier;
2675 data = new char[length];
2676 index = startPosition;
2677 for (int i = 0; i < length; i++) {
2678 data[i] = Character.toLowerCase(source[index + i]);
2682 // if ((length = withoutUnicodePtr) == 1)
2683 // return TokenNameIdentifier;
2684 // // data = withoutUnicodeBuffer;
2685 // data = new char[withoutUnicodeBuffer.length];
2686 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2687 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2691 firstLetter = data[index];
2692 switch (firstLetter) {
2697 if ((data[++index] == '_') && (data[++index] == 'f')
2698 && (data[++index] == 'i') && (data[++index] == 'l')
2699 && (data[++index] == 'e') && (data[++index] == '_')
2700 && (data[++index] == '_'))
2701 return TokenNameFILE;
2702 index = 0; //__LINE__
2703 if ((data[++index] == '_') && (data[++index] == 'l')
2704 && (data[++index] == 'i') && (data[++index] == 'n')
2705 && (data[++index] == 'e') && (data[++index] == '_')
2706 && (data[++index] == '_'))
2707 return TokenNameLINE;
2711 if ((data[++index] == '_') && (data[++index] == 'c')
2712 && (data[++index] == 'l') && (data[++index] == 'a')
2713 && (data[++index] == 's') && (data[++index] == 's')
2714 && (data[++index] == '_') && (data[++index] == '_'))
2715 return TokenNameCLASS_C;
2719 if ((data[++index] == '_') && (data[++index] == 'm')
2720 && (data[++index] == 'e') && (data[++index] == 't')
2721 && (data[++index] == 'h') && (data[++index] == 'o')
2722 && (data[++index] == 'd') && (data[++index] == '_')
2723 && (data[++index] == '_'))
2724 return TokenNameMETHOD_C;
2728 if ((data[++index] == '_') && (data[++index] == 'f')
2729 && (data[++index] == 'u') && (data[++index] == 'n')
2730 && (data[++index] == 'c') && (data[++index] == 't')
2731 && (data[++index] == 'i') && (data[++index] == 'o')
2732 && (data[++index] == 'n') && (data[++index] == '_')
2733 && (data[++index] == '_'))
2734 return TokenNameFUNC_C;
2737 return TokenNameIdentifier;
2739 // as and array abstract
2743 if ((data[++index] == 's')) {
2746 return TokenNameIdentifier;
2750 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2751 return TokenNameand;
2753 return TokenNameIdentifier;
2757 if ((data[++index] == 'r') && (data[++index] == 'r')
2758 && (data[++index] == 'a') && (data[++index] == 'y'))
2759 return TokenNamearray;
2761 return TokenNameIdentifier;
2763 if ((data[++index] == 'b') && (data[++index] == 's')
2764 && (data[++index] == 't') && (data[++index] == 'r')
2765 && (data[++index] == 'a') && (data[++index] == 'c')
2766 && (data[++index] == 't'))
2767 return TokenNameabstract;
2769 return TokenNameIdentifier;
2771 return TokenNameIdentifier;
2777 if ((data[++index] == 'r') && (data[++index] == 'e')
2778 && (data[++index] == 'a') && (data[++index] == 'k'))
2779 return TokenNamebreak;
2781 return TokenNameIdentifier;
2783 return TokenNameIdentifier;
2786 //case catch class clone const continue
2789 if ((data[++index] == 'a') && (data[++index] == 's')
2790 && (data[++index] == 'e'))
2791 return TokenNamecase;
2793 return TokenNameIdentifier;
2795 if ((data[++index] == 'a') && (data[++index] == 't')
2796 && (data[++index] == 'c') && (data[++index] == 'h'))
2797 return TokenNamecatch;
2799 if ((data[++index] == 'l') && (data[++index] == 'a')
2800 && (data[++index] == 's') && (data[++index] == 's'))
2801 return TokenNameclass;
2803 if ((data[++index] == 'l') && (data[++index] == 'o')
2804 && (data[++index] == 'n') && (data[++index] == 'e'))
2805 return TokenNameclone;
2807 if ((data[++index] == 'o') && (data[++index] == 'n')
2808 && (data[++index] == 's') && (data[++index] == 't'))
2809 return TokenNameconst;
2811 return TokenNameIdentifier;
2813 if ((data[++index] == 'o') && (data[++index] == 'n')
2814 && (data[++index] == 't') && (data[++index] == 'i')
2815 && (data[++index] == 'n') && (data[++index] == 'u')
2816 && (data[++index] == 'e'))
2817 return TokenNamecontinue;
2819 return TokenNameIdentifier;
2821 return TokenNameIdentifier;
2824 // declare default do die
2825 // TODO delete define ==> no keyword !
2828 if ((data[++index] == 'o'))
2831 return TokenNameIdentifier;
2833 // if ((data[++index] == 'e')
2834 // && (data[++index] == 'f')
2835 // && (data[++index] == 'i')
2836 // && (data[++index] == 'n')
2837 // && (data[++index] == 'e'))
2838 // return TokenNamedefine;
2840 // return TokenNameIdentifier;
2842 if ((data[++index] == 'e') && (data[++index] == 'c')
2843 && (data[++index] == 'l') && (data[++index] == 'a')
2844 && (data[++index] == 'r') && (data[++index] == 'e'))
2845 return TokenNamedeclare;
2847 if ((data[++index] == 'e') && (data[++index] == 'f')
2848 && (data[++index] == 'a') && (data[++index] == 'u')
2849 && (data[++index] == 'l') && (data[++index] == 't'))
2850 return TokenNamedefault;
2852 return TokenNameIdentifier;
2854 return TokenNameIdentifier;
2857 //echo else exit elseif extends eval
2860 if ((data[++index] == 'c') && (data[++index] == 'h')
2861 && (data[++index] == 'o'))
2862 return TokenNameecho;
2863 else if ((data[index] == 'l') && (data[++index] == 's')
2864 && (data[++index] == 'e'))
2865 return TokenNameelse;
2866 else if ((data[index] == 'x') && (data[++index] == 'i')
2867 && (data[++index] == 't'))
2868 return TokenNameexit;
2869 else if ((data[index] == 'v') && (data[++index] == 'a')
2870 && (data[++index] == 'l'))
2871 return TokenNameeval;
2873 return TokenNameIdentifier;
2876 if ((data[++index] == 'n') && (data[++index] == 'd')
2877 && (data[++index] == 'i') && (data[++index] == 'f'))
2878 return TokenNameendif;
2879 if ((data[index] == 'm') && (data[++index] == 'p')
2880 && (data[++index] == 't') && (data[++index] == 'y'))
2881 return TokenNameempty;
2883 return TokenNameIdentifier;
2886 if ((data[++index] == 'n') && (data[++index] == 'd')
2887 && (data[++index] == 'f') && (data[++index] == 'o')
2888 && (data[++index] == 'r'))
2889 return TokenNameendfor;
2890 else if ((data[index] == 'l') && (data[++index] == 's')
2891 && (data[++index] == 'e') && (data[++index] == 'i')
2892 && (data[++index] == 'f'))
2893 return TokenNameelseif;
2895 return TokenNameIdentifier;
2897 if ((data[++index] == 'x') && (data[++index] == 't')
2898 && (data[++index] == 'e') && (data[++index] == 'n')
2899 && (data[++index] == 'd') && (data[++index] == 's'))
2900 return TokenNameextends;
2902 return TokenNameIdentifier;
2905 if ((data[++index] == 'n') && (data[++index] == 'd')
2906 && (data[++index] == 'w') && (data[++index] == 'h')
2907 && (data[++index] == 'i') && (data[++index] == 'l')
2908 && (data[++index] == 'e'))
2909 return TokenNameendwhile;
2911 return TokenNameIdentifier;
2914 if ((data[++index] == 'n') && (data[++index] == 'd')
2915 && (data[++index] == 's') && (data[++index] == 'w')
2916 && (data[++index] == 'i') && (data[++index] == 't')
2917 && (data[++index] == 'c') && (data[++index] == 'h'))
2918 return TokenNameendswitch;
2920 return TokenNameIdentifier;
2923 if ((data[++index] == 'n') && (data[++index] == 'd')
2924 && (data[++index] == 'd') && (data[++index] == 'e')
2925 && (data[++index] == 'c') && (data[++index] == 'l')
2926 && (data[++index] == 'a') && (data[++index] == 'r')
2927 && (data[++index] == 'e'))
2928 return TokenNameendforeach;
2930 if ((data[++index] == 'n') // endforeach
2931 && (data[++index] == 'd') && (data[++index] == 'f')
2932 && (data[++index] == 'o') && (data[++index] == 'r')
2933 && (data[++index] == 'e') && (data[++index] == 'a')
2934 && (data[++index] == 'c') && (data[++index] == 'h'))
2935 return TokenNameendforeach;
2937 return TokenNameIdentifier;
2939 return TokenNameIdentifier;
2942 //for false final function
2945 if ((data[++index] == 'o') && (data[++index] == 'r'))
2946 return TokenNamefor;
2948 return TokenNameIdentifier;
2950 // if ((data[++index] == 'a') && (data[++index] == 'l')
2951 // && (data[++index] == 's') && (data[++index] == 'e'))
2952 // return TokenNamefalse;
2953 if ((data[++index] == 'i') && (data[++index] == 'n')
2954 && (data[++index] == 'a') && (data[++index] == 'l'))
2955 return TokenNamefinal;
2957 return TokenNameIdentifier;
2960 if ((data[++index] == 'o') && (data[++index] == 'r')
2961 && (data[++index] == 'e') && (data[++index] == 'a')
2962 && (data[++index] == 'c') && (data[++index] == 'h'))
2963 return TokenNameforeach;
2965 return TokenNameIdentifier;
2968 if ((data[++index] == 'u') && (data[++index] == 'n')
2969 && (data[++index] == 'c') && (data[++index] == 't')
2970 && (data[++index] == 'i') && (data[++index] == 'o')
2971 && (data[++index] == 'n'))
2972 return TokenNamefunction;
2974 return TokenNameIdentifier;
2976 return TokenNameIdentifier;
2981 if ((data[++index] == 'l') && (data[++index] == 'o')
2982 && (data[++index] == 'b') && (data[++index] == 'a')
2983 && (data[++index] == 'l')) {
2984 return TokenNameglobal;
2987 return TokenNameIdentifier;
2989 //if int isset include include_once instanceof interface implements
2992 if (data[++index] == 'f')
2995 return TokenNameIdentifier;
2997 // if ((data[++index] == 'n') && (data[++index] == 't'))
2998 // return TokenNameint;
3000 // return TokenNameIdentifier;
3002 if ((data[++index] == 's') && (data[++index] == 's')
3003 && (data[++index] == 'e') && (data[++index] == 't'))
3004 return TokenNameisset;
3006 return TokenNameIdentifier;
3008 if ((data[++index] == 'n') && (data[++index] == 'c')
3009 && (data[++index] == 'l') && (data[++index] == 'u')
3010 && (data[++index] == 'd') && (data[++index] == 'e'))
3011 return TokenNameinclude;
3013 return TokenNameIdentifier;
3016 if ((data[++index] == 'n') && (data[++index] == 't')
3017 && (data[++index] == 'e') && (data[++index] == 'r')
3018 && (data[++index] == 'f') && (data[++index] == 'a')
3019 && (data[++index] == 'c') && (data[++index] == 'e'))
3020 return TokenNameinterface;
3022 return TokenNameIdentifier;
3025 if ((data[++index] == 'n') && (data[++index] == 's')
3026 && (data[++index] == 't') && (data[++index] == 'a')
3027 && (data[++index] == 'n') && (data[++index] == 'c')
3028 && (data[++index] == 'e') && (data[++index] == 'o')
3029 && (data[++index] == 'f'))
3030 return TokenNameinstanceof;
3031 if ((data[index] == 'm') && (data[++index] == 'p')
3032 && (data[++index] == 'l') && (data[++index] == 'e')
3033 && (data[++index] == 'm') && (data[++index] == 'e')
3034 && (data[++index] == 'n') && (data[++index] == 't')
3035 && (data[++index] == 's'))
3036 return TokenNameimplements;
3038 return TokenNameIdentifier;
3040 if ((data[++index] == 'n') && (data[++index] == 'c')
3041 && (data[++index] == 'l') && (data[++index] == 'u')
3042 && (data[++index] == 'd') && (data[++index] == 'e')
3043 && (data[++index] == '_') && (data[++index] == 'o')
3044 && (data[++index] == 'n') && (data[++index] == 'c')
3045 && (data[++index] == 'e'))
3046 return TokenNameinclude_once;
3048 return TokenNameIdentifier;
3050 return TokenNameIdentifier;
3055 if ((data[++index] == 'i') && (data[++index] == 's')
3056 && (data[++index] == 't')) {
3057 return TokenNamelist;
3060 return TokenNameIdentifier;
3065 if ((data[++index] == 'e') && (data[++index] == 'w'))
3066 return TokenNamenew;
3068 return TokenNameIdentifier;
3070 // if ((data[++index] == 'u') && (data[++index] == 'l')
3071 // && (data[++index] == 'l'))
3072 // return TokenNamenull;
3074 // return TokenNameIdentifier;
3076 return TokenNameIdentifier;
3081 if (data[++index] == 'r') {
3085 // if (length == 12) {
3086 // if ((data[++index] == 'l')
3087 // && (data[++index] == 'd')
3088 // && (data[++index] == '_')
3089 // && (data[++index] == 'f')
3090 // && (data[++index] == 'u')
3091 // && (data[++index] == 'n')
3092 // && (data[++index] == 'c')
3093 // && (data[++index] == 't')
3094 // && (data[++index] == 'i')
3095 // && (data[++index] == 'o')
3096 // && (data[++index] == 'n')) {
3097 // return TokenNameold_function;
3100 return TokenNameIdentifier;
3102 // print public private protected
3105 if ((data[++index] == 'r') && (data[++index] == 'i')
3106 && (data[++index] == 'n') && (data[++index] == 't')) {
3107 return TokenNameprint;
3109 return TokenNameIdentifier;
3111 if ((data[++index] == 'u') && (data[++index] == 'b')
3112 && (data[++index] == 'l') && (data[++index] == 'i')
3113 && (data[++index] == 'c')) {
3114 return TokenNamepublic;
3116 return TokenNameIdentifier;
3118 if ((data[++index] == 'r') && (data[++index] == 'i')
3119 && (data[++index] == 'v') && (data[++index] == 'a')
3120 && (data[++index] == 't') && (data[++index] == 'e')) {
3121 return TokenNameprivate;
3123 return TokenNameIdentifier;
3125 if ((data[++index] == 'r') && (data[++index] == 'o')
3126 && (data[++index] == 't') && (data[++index] == 'e')
3127 && (data[++index] == 'c') && (data[++index] == 't')
3128 && (data[++index] == 'e') && (data[++index] == 'd')) {
3129 return TokenNameprotected;
3131 return TokenNameIdentifier;
3133 return TokenNameIdentifier;
3135 //return require require_once
3137 if ((data[++index] == 'e') && (data[++index] == 't')
3138 && (data[++index] == 'u') && (data[++index] == 'r')
3139 && (data[++index] == 'n')) {
3140 return TokenNamereturn;
3142 } else if (length == 7) {
3143 if ((data[++index] == 'e') && (data[++index] == 'q')
3144 && (data[++index] == 'u') && (data[++index] == 'i')
3145 && (data[++index] == 'r') && (data[++index] == 'e')) {
3146 return TokenNamerequire;
3148 } else if (length == 12) {
3149 if ((data[++index] == 'e') && (data[++index] == 'q')
3150 && (data[++index] == 'u') && (data[++index] == 'i')
3151 && (data[++index] == 'r') && (data[++index] == 'e')
3152 && (data[++index] == '_') && (data[++index] == 'o')
3153 && (data[++index] == 'n') && (data[++index] == 'c')
3154 && (data[++index] == 'e')) {
3155 return TokenNamerequire_once;
3158 return TokenNameIdentifier;
3163 if (data[++index] == 't')
3164 if ((data[++index] == 'a') && (data[++index] == 't')
3165 && (data[++index] == 'i') && (data[++index] == 'c')) {
3166 return TokenNamestatic;
3168 return TokenNameIdentifier;
3169 else if ((data[index] == 'w') && (data[++index] == 'i')
3170 && (data[++index] == 't') && (data[++index] == 'c')
3171 && (data[++index] == 'h'))
3172 return TokenNameswitch;
3174 return TokenNameIdentifier;
3176 return TokenNameIdentifier;
3182 if ((data[++index] == 'r') && (data[++index] == 'y'))
3183 return TokenNametry;
3185 return TokenNameIdentifier;
3187 // if ((data[++index] == 'r') && (data[++index] == 'u')
3188 // && (data[++index] == 'e'))
3189 // return TokenNametrue;
3191 // return TokenNameIdentifier;
3193 if ((data[++index] == 'h') && (data[++index] == 'r')
3194 && (data[++index] == 'o') && (data[++index] == 'w'))
3195 return TokenNamethrow;
3197 return TokenNameIdentifier;
3199 return TokenNameIdentifier;
3205 if ((data[++index] == 's') && (data[++index] == 'e'))
3206 return TokenNameuse;
3208 return TokenNameIdentifier;
3210 if ((data[++index] == 'n') && (data[++index] == 's')
3211 && (data[++index] == 'e') && (data[++index] == 't'))
3212 return TokenNameunset;
3214 return TokenNameIdentifier;
3216 return TokenNameIdentifier;
3222 if ((data[++index] == 'a') && (data[++index] == 'r'))
3223 return TokenNamevar;
3225 return TokenNameIdentifier;
3227 return TokenNameIdentifier;
3233 if ((data[++index] == 'h') && (data[++index] == 'i')
3234 && (data[++index] == 'l') && (data[++index] == 'e'))
3235 return TokenNamewhile;
3237 return TokenNameIdentifier;
3238 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3239 // (data[++index]=='e') && (data[++index]=='f')&&
3240 // (data[++index]=='p'))
3241 //return TokenNamewidefp ;
3243 //return TokenNameIdentifier;
3245 return TokenNameIdentifier;
3251 if ((data[++index] == 'o') && (data[++index] == 'r'))
3252 return TokenNamexor;
3254 return TokenNameIdentifier;
3256 return TokenNameIdentifier;
3259 return TokenNameIdentifier;
3262 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3263 //when entering this method the currentCharacter is the firt
3264 //digit of the number , i.e. it may be preceeded by a . when
3266 boolean floating = dotPrefix;
3267 if ((!dotPrefix) && (currentCharacter == '0')) {
3268 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3269 //force the first char of the hexa number do exist...
3270 // consume next character
3271 unicodeAsBackSlash = false;
3272 currentCharacter = source[currentPosition++];
3273 // if (((currentCharacter = source[currentPosition++]) == '\\')
3274 // && (source[currentPosition] == 'u')) {
3275 // getNextUnicodeChar();
3277 // if (withoutUnicodePtr != 0) {
3278 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3281 if (Character.digit(currentCharacter, 16) == -1)
3282 throw new InvalidInputException(INVALID_HEXA);
3284 while (getNextCharAsDigit(16)) {
3286 // if (getNextChar('l', 'L') >= 0)
3287 // return TokenNameLongLiteral;
3289 return TokenNameIntegerLiteral;
3291 //there is x or X in the number
3292 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3293 // 00078.0 is true !!!!! crazy language
3294 if (getNextCharAsDigit()) {
3295 //-------------potential octal-----------------
3296 while (getNextCharAsDigit()) {
3298 // if (getNextChar('l', 'L') >= 0) {
3299 // return TokenNameLongLiteral;
3302 // if (getNextChar('f', 'F') >= 0) {
3303 // return TokenNameFloatingPointLiteral;
3305 if (getNextChar('d', 'D') >= 0) {
3306 return TokenNameDoubleLiteral;
3307 } else { //make the distinction between octal and float ....
3308 if (getNextChar('.')) { //bingo ! ....
3309 while (getNextCharAsDigit()) {
3311 if (getNextChar('e', 'E') >= 0) {
3312 // consume next character
3313 unicodeAsBackSlash = false;
3314 currentCharacter = source[currentPosition++];
3315 // if (((currentCharacter = source[currentPosition++]) == '\\')
3316 // && (source[currentPosition] == 'u')) {
3317 // getNextUnicodeChar();
3319 // if (withoutUnicodePtr != 0) {
3320 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3323 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3324 // consume next character
3325 unicodeAsBackSlash = false;
3326 currentCharacter = source[currentPosition++];
3327 // if (((currentCharacter = source[currentPosition++]) == '\\')
3328 // && (source[currentPosition] == 'u')) {
3329 // getNextUnicodeChar();
3331 // if (withoutUnicodePtr != 0) {
3332 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3333 // currentCharacter;
3337 if (!Character.isDigit(currentCharacter))
3338 throw new InvalidInputException(INVALID_FLOAT);
3339 while (getNextCharAsDigit()) {
3342 // if (getNextChar('f', 'F') >= 0)
3343 // return TokenNameFloatingPointLiteral;
3344 getNextChar('d', 'D'); //jump over potential d or D
3345 return TokenNameDoubleLiteral;
3347 return TokenNameIntegerLiteral;
3354 while (getNextCharAsDigit()) {
3356 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3357 // return TokenNameLongLiteral;
3358 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3359 while (getNextCharAsDigit()) {
3363 //if floating is true both exponant and suffix may be optional
3364 if (getNextChar('e', 'E') >= 0) {
3366 // consume next character
3367 unicodeAsBackSlash = false;
3368 currentCharacter = source[currentPosition++];
3369 // if (((currentCharacter = source[currentPosition++]) == '\\')
3370 // && (source[currentPosition] == 'u')) {
3371 // getNextUnicodeChar();
3373 // if (withoutUnicodePtr != 0) {
3374 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3377 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3380 unicodeAsBackSlash = false;
3381 currentCharacter = source[currentPosition++];
3382 // if (((currentCharacter = source[currentPosition++]) == '\\')
3383 // && (source[currentPosition] == 'u')) {
3384 // getNextUnicodeChar();
3386 // if (withoutUnicodePtr != 0) {
3387 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3391 if (!Character.isDigit(currentCharacter))
3392 throw new InvalidInputException(INVALID_FLOAT);
3393 while (getNextCharAsDigit()) {
3396 if (getNextChar('d', 'D') >= 0)
3397 return TokenNameDoubleLiteral;
3398 // if (getNextChar('f', 'F') >= 0)
3399 // return TokenNameFloatingPointLiteral;
3400 //the long flag has been tested before
3401 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3404 * Search the line number corresponding to a specific position
3407 public final int getLineNumber(int position) {
3408 if (lineEnds == null)
3410 int length = linePtr + 1;
3413 int g = 0, d = length - 1;
3417 if (position < lineEnds[m]) {
3419 } else if (position > lineEnds[m]) {
3425 if (position < lineEnds[m]) {
3430 public void setPHPMode(boolean mode) {
3433 public final void setSource(char[] source) {
3434 //the source-buffer is set to sourceString
3435 if (source == null) {
3436 this.source = new char[0];
3438 this.source = source;
3441 initialPosition = currentPosition = 0;
3442 containsAssertKeyword = false;
3443 withoutUnicodeBuffer = new char[this.source.length];
3445 public String toString() {
3446 if (startPosition == source.length)
3447 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3448 if (currentPosition > source.length)
3449 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3450 char front[] = new char[startPosition];
3451 System.arraycopy(source, 0, front, 0, startPosition);
3452 int middleLength = (currentPosition - 1) - startPosition + 1;
3454 if (middleLength > -1) {
3455 middle = new char[middleLength];
3456 System.arraycopy(source, startPosition, middle, 0, middleLength);
3458 middle = new char[0];
3460 char end[] = new char[source.length - (currentPosition - 1)];
3461 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3462 - (currentPosition - 1) - 1);
3463 return new String(front)
3464 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3465 + new String(middle)
3466 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3469 public final String toStringAction(int act) {
3471 case TokenNameERROR :
3472 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3474 case TokenNameINLINE_HTML :
3475 return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3476 case TokenNameIdentifier :
3477 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3478 case TokenNameVariable :
3479 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3480 case TokenNameabstract :
3481 return "abstract"; //$NON-NLS-1$
3483 return "AND"; //$NON-NLS-1$
3484 case TokenNamearray :
3485 return "array"; //$NON-NLS-1$
3487 return "as"; //$NON-NLS-1$
3488 case TokenNamebreak :
3489 return "break"; //$NON-NLS-1$
3490 case TokenNamecase :
3491 return "case"; //$NON-NLS-1$
3492 case TokenNameclass :
3493 return "class"; //$NON-NLS-1$
3494 case TokenNameclone :
3497 case TokenNameconst :
3500 case TokenNamecontinue :
3501 return "continue"; //$NON-NLS-1$
3502 case TokenNamedefault :
3503 return "default"; //$NON-NLS-1$
3504 // case TokenNamedefine :
3505 // return "define"; //$NON-NLS-1$
3507 return "do"; //$NON-NLS-1$
3508 case TokenNameecho :
3509 return "echo"; //$NON-NLS-1$
3510 case TokenNameelse :
3511 return "else"; //$NON-NLS-1$
3512 case TokenNameelseif :
3513 return "elseif"; //$NON-NLS-1$
3514 case TokenNameendfor :
3515 return "endfor"; //$NON-NLS-1$
3516 case TokenNameendforeach :
3517 return "endforeach"; //$NON-NLS-1$
3518 case TokenNameendif :
3519 return "endif"; //$NON-NLS-1$
3520 case TokenNameendswitch :
3521 return "endswitch"; //$NON-NLS-1$
3522 case TokenNameendwhile :
3523 return "endwhile"; //$NON-NLS-1$
3524 case TokenNameextends :
3525 return "extends"; //$NON-NLS-1$
3526 // case TokenNamefalse :
3527 // return "false"; //$NON-NLS-1$
3528 case TokenNamefinal :
3529 return "final"; //$NON-NLS-1$
3531 return "for"; //$NON-NLS-1$
3532 case TokenNameforeach :
3533 return "foreach"; //$NON-NLS-1$
3534 case TokenNamefunction :
3535 return "function"; //$NON-NLS-1$
3536 case TokenNameglobal :
3537 return "global"; //$NON-NLS-1$
3539 return "if"; //$NON-NLS-1$
3540 case TokenNameimplements :
3541 return "implements"; //$NON-NLS-1$
3542 case TokenNameinclude :
3543 return "include"; //$NON-NLS-1$
3544 case TokenNameinclude_once :
3545 return "include_once"; //$NON-NLS-1$
3546 case TokenNameinterface :
3547 return "interface"; //$NON-NLS-1$
3548 case TokenNameisset :
3549 return "isset"; //$NON-NLS-1$
3550 case TokenNamelist :
3551 return "list"; //$NON-NLS-1$
3553 return "new"; //$NON-NLS-1$
3554 // case TokenNamenull :
3555 // return "null"; //$NON-NLS-1$
3557 return "OR"; //$NON-NLS-1$
3558 case TokenNameprint :
3559 return "print"; //$NON-NLS-1$
3560 case TokenNameprivate :
3561 return "private"; //$NON-NLS-1$
3562 case TokenNameprotected :
3563 return "protected"; //$NON-NLS-1$
3564 case TokenNamepublic :
3565 return "public"; //$NON-NLS-1$
3566 case TokenNamerequire :
3567 return "require"; //$NON-NLS-1$
3568 case TokenNamerequire_once :
3569 return "require_once"; //$NON-NLS-1$
3570 case TokenNamereturn :
3571 return "return"; //$NON-NLS-1$
3572 case TokenNamestatic :
3573 return "static"; //$NON-NLS-1$
3574 case TokenNameswitch :
3575 return "switch"; //$NON-NLS-1$
3576 // case TokenNametrue :
3577 // return "true"; //$NON-NLS-1$
3578 case TokenNameunset :
3579 return "unset"; //$NON-NLS-1$
3581 return "var"; //$NON-NLS-1$
3582 case TokenNamewhile :
3583 return "while"; //$NON-NLS-1$
3585 return "XOR"; //$NON-NLS-1$
3586 // case TokenNamethis :
3587 // return "$this"; //$NON-NLS-1$
3588 case TokenNameIntegerLiteral :
3589 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3590 case TokenNameDoubleLiteral :
3591 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3592 case TokenNameStringLiteral :
3593 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3594 case TokenNameStringConstant :
3595 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3596 case TokenNameStringInterpolated :
3597 return "StringInterpolated(" + new String(getCurrentTokenSource())
3598 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3599 case TokenNameHEREDOC :
3600 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3601 case TokenNamePLUS_PLUS :
3602 return "++"; //$NON-NLS-1$
3603 case TokenNameMINUS_MINUS :
3604 return "--"; //$NON-NLS-1$
3605 case TokenNameEQUAL_EQUAL :
3606 return "=="; //$NON-NLS-1$
3607 case TokenNameEQUAL_EQUAL_EQUAL :
3608 return "==="; //$NON-NLS-1$
3609 case TokenNameEQUAL_GREATER :
3610 return "=>"; //$NON-NLS-1$
3611 case TokenNameLESS_EQUAL :
3612 return "<="; //$NON-NLS-1$
3613 case TokenNameGREATER_EQUAL :
3614 return ">="; //$NON-NLS-1$
3615 case TokenNameNOT_EQUAL :
3616 return "!="; //$NON-NLS-1$
3617 case TokenNameNOT_EQUAL_EQUAL :
3618 return "!=="; //$NON-NLS-1$
3619 case TokenNameLEFT_SHIFT :
3620 return "<<"; //$NON-NLS-1$
3621 case TokenNameRIGHT_SHIFT :
3622 return ">>"; //$NON-NLS-1$
3623 case TokenNamePLUS_EQUAL :
3624 return "+="; //$NON-NLS-1$
3625 case TokenNameMINUS_EQUAL :
3626 return "-="; //$NON-NLS-1$
3627 case TokenNameMULTIPLY_EQUAL :
3628 return "*="; //$NON-NLS-1$
3629 case TokenNameDIVIDE_EQUAL :
3630 return "/="; //$NON-NLS-1$
3631 case TokenNameAND_EQUAL :
3632 return "&="; //$NON-NLS-1$
3633 case TokenNameOR_EQUAL :
3634 return "|="; //$NON-NLS-1$
3635 case TokenNameXOR_EQUAL :
3636 return "^="; //$NON-NLS-1$
3637 case TokenNameREMAINDER_EQUAL :
3638 return "%="; //$NON-NLS-1$
3639 case TokenNameDOT_EQUAL :
3640 return ".="; //$NON-NLS-1$
3641 case TokenNameLEFT_SHIFT_EQUAL :
3642 return "<<="; //$NON-NLS-1$
3643 case TokenNameRIGHT_SHIFT_EQUAL :
3644 return ">>="; //$NON-NLS-1$
3645 case TokenNameOR_OR :
3646 return "||"; //$NON-NLS-1$
3647 case TokenNameAND_AND :
3648 return "&&"; //$NON-NLS-1$
3649 case TokenNamePLUS :
3650 return "+"; //$NON-NLS-1$
3651 case TokenNameMINUS :
3652 return "-"; //$NON-NLS-1$
3653 case TokenNameMINUS_GREATER :
3656 return "!"; //$NON-NLS-1$
3657 case TokenNameREMAINDER :
3658 return "%"; //$NON-NLS-1$
3660 return "^"; //$NON-NLS-1$
3662 return "&"; //$NON-NLS-1$
3663 case TokenNameMULTIPLY :
3664 return "*"; //$NON-NLS-1$
3666 return "|"; //$NON-NLS-1$
3667 case TokenNameTWIDDLE :
3668 return "~"; //$NON-NLS-1$
3669 case TokenNameTWIDDLE_EQUAL :
3670 return "~="; //$NON-NLS-1$
3671 case TokenNameDIVIDE :
3672 return "/"; //$NON-NLS-1$
3673 case TokenNameGREATER :
3674 return ">"; //$NON-NLS-1$
3675 case TokenNameLESS :
3676 return "<"; //$NON-NLS-1$
3677 case TokenNameLPAREN :
3678 return "("; //$NON-NLS-1$
3679 case TokenNameRPAREN :
3680 return ")"; //$NON-NLS-1$
3681 case TokenNameLBRACE :
3682 return "{"; //$NON-NLS-1$
3683 case TokenNameRBRACE :
3684 return "}"; //$NON-NLS-1$
3685 case TokenNameLBRACKET :
3686 return "["; //$NON-NLS-1$
3687 case TokenNameRBRACKET :
3688 return "]"; //$NON-NLS-1$
3689 case TokenNameSEMICOLON :
3690 return ";"; //$NON-NLS-1$
3691 case TokenNameQUESTION :
3692 return "?"; //$NON-NLS-1$
3693 case TokenNameCOLON :
3694 return ":"; //$NON-NLS-1$
3695 case TokenNameCOMMA :
3696 return ","; //$NON-NLS-1$
3698 return "."; //$NON-NLS-1$
3699 case TokenNameEQUAL :
3700 return "="; //$NON-NLS-1$
3703 case TokenNameDOLLAR :
3705 // case TokenNameDOLLAR_LBRACE :
3708 return "EOF"; //$NON-NLS-1$
3709 case TokenNameWHITESPACE :
3710 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3711 case TokenNameCOMMENT_LINE :
3712 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3713 case TokenNameCOMMENT_BLOCK :
3714 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3715 case TokenNameCOMMENT_PHPDOC :
3716 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3717 case TokenNameHTML :
3718 return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3719 case TokenNameFILE :
3720 return "__FILE__"; //$NON-NLS-1$
3721 case TokenNameLINE :
3722 return "__LINE__"; //$NON-NLS-1$
3723 case TokenNameCLASS_C :
3724 return "__CLASS__"; //$NON-NLS-1$
3725 case TokenNameMETHOD_C :
3726 return "__METHOD__"; //$NON-NLS-1$
3727 case TokenNameFUNC_C :
3728 return "__FUNCTION__"; //$NON-NLS-1
3729 case TokenNameboolCAST :
3730 return "( bool )"; //$NON-NLS-1$
3731 case TokenNameintCAST :
3732 return "( int )"; //$NON-NLS-1$
3733 case TokenNamedoubleCAST :
3734 return "( double )"; //$NON-NLS-1$
3735 case TokenNameobjectCAST :
3736 return "( object )"; //$NON-NLS-1$
3737 case TokenNamestringCAST :
3738 return "( string )"; //$NON-NLS-1$
3740 return "not-a-token(" + (new Integer(act)) + ") "
3741 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3744 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3745 boolean checkNonExternalizedStringLiterals) {
3746 this(tokenizeComments, tokenizeWhiteSpace,
3747 checkNonExternalizedStringLiterals, false);
3749 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3750 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3751 this.eofPosition = Integer.MAX_VALUE;
3752 this.tokenizeComments = tokenizeComments;
3753 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3754 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3755 this.assertMode = assertMode;
3757 private void checkNonExternalizeString() throws InvalidInputException {
3758 if (currentLine == null)
3760 parseTags(currentLine);
3762 private void parseTags(NLSLine line) throws InvalidInputException {
3763 String s = new String(getCurrentTokenSource());
3764 int pos = s.indexOf(TAG_PREFIX);
3765 int lineLength = line.size();
3767 int start = pos + TAG_PREFIX_LENGTH;
3768 int end = s.indexOf(TAG_POSTFIX, start);
3769 String index = s.substring(start, end);
3772 i = Integer.parseInt(index) - 1;
3773 // Tags are one based not zero based.
3774 } catch (NumberFormatException e) {
3775 i = -1; // we don't want to consider this as a valid NLS tag
3777 if (line.exists(i)) {
3780 pos = s.indexOf(TAG_PREFIX, start);
3782 this.nonNLSStrings = new StringLiteral[lineLength];
3783 int nonNLSCounter = 0;
3784 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3785 StringLiteral literal = (StringLiteral) iterator.next();
3786 if (literal != null) {
3787 this.nonNLSStrings[nonNLSCounter++] = literal;
3790 if (nonNLSCounter == 0) {
3791 this.nonNLSStrings = null;
3795 this.wasNonExternalizedStringLiteral = true;
3796 if (nonNLSCounter != lineLength) {
3797 System.arraycopy(this.nonNLSStrings, 0,
3798 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3803 public final void scanEscapeCharacter() throws InvalidInputException {
3804 // the string with "\\u" is a legal string of two chars \ and u
3805 //thus we use a direct access to the source (for regular cases).
3806 if (unicodeAsBackSlash) {
3807 // consume next character
3808 unicodeAsBackSlash = false;
3809 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3810 // (source[currentPosition] == 'u')) {
3811 // getNextUnicodeChar();
3813 if (withoutUnicodePtr != 0) {
3814 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3818 currentCharacter = source[currentPosition++];
3819 switch (currentCharacter) {
3821 currentCharacter = '\b';
3824 currentCharacter = '\t';
3827 currentCharacter = '\n';
3830 currentCharacter = '\f';
3833 currentCharacter = '\r';
3836 currentCharacter = '\"';
3839 currentCharacter = '\'';
3842 currentCharacter = '\\';
3845 // -----------octal escape--------------
3847 // OctalDigit OctalDigit
3848 // ZeroToThree OctalDigit OctalDigit
3849 int number = Character.getNumericValue(currentCharacter);
3850 if (number >= 0 && number <= 7) {
3851 boolean zeroToThreeNot = number > 3;
3852 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3853 int digit = Character.getNumericValue(currentCharacter);
3854 if (digit >= 0 && digit <= 7) {
3855 number = (number * 8) + digit;
3857 .isDigit(currentCharacter = source[currentPosition++])) {
3858 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3859 // Digit --> ignore last character
3862 digit = Character.getNumericValue(currentCharacter);
3863 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3864 // OctalDigit OctalDigit
3865 number = (number * 8) + digit;
3866 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3867 // --> ignore last character
3871 } else { // has read \OctalDigit NonDigit--> ignore last
3875 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3879 } else { // has read \OctalDigit --> ignore last character
3883 throw new InvalidInputException(INVALID_ESCAPE);
3884 currentCharacter = (char) number;
3886 throw new InvalidInputException(INVALID_ESCAPE);
3889 // chech presence of task: tags
3890 public void checkTaskTag(int commentStart, int commentEnd) {
3891 // only look for newer task: tags
3892 if (this.foundTaskCount > 0
3893 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3896 int foundTaskIndex = this.foundTaskCount;
3897 nextChar : for (int i = commentStart; i < commentEnd
3898 && i < this.eofPosition; i++) {
3900 char[] priority = null;
3901 // check for tag occurrence
3902 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3903 tag = this.taskTags[itag];
3904 priority = this.taskPriorities != null
3905 && itag < this.taskPriorities.length
3906 ? this.taskPriorities[itag]
3908 int tagLength = tag.length;
3909 for (int t = 0; t < tagLength; t++) {
3910 if (this.source[i + t] != tag[t])
3913 if (this.foundTaskTags == null) {
3914 this.foundTaskTags = new char[5][];
3915 this.foundTaskMessages = new char[5][];
3916 this.foundTaskPriorities = new char[5][];
3917 this.foundTaskPositions = new int[5][];
3918 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3919 System.arraycopy(this.foundTaskTags, 0,
3920 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3921 this.foundTaskCount);
3922 System.arraycopy(this.foundTaskMessages, 0,
3923 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3924 this.foundTaskCount);
3925 System.arraycopy(this.foundTaskPriorities, 0,
3926 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3927 0, this.foundTaskCount);
3928 System.arraycopy(this.foundTaskPositions, 0,
3929 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3930 this.foundTaskCount);
3932 this.foundTaskTags[this.foundTaskCount] = tag;
3933 this.foundTaskPriorities[this.foundTaskCount] = priority;
3934 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3936 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3937 this.foundTaskCount++;
3938 i += tagLength - 1; // will be incremented when looping
3941 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3942 // retrieve message start and end positions
3943 int msgStart = this.foundTaskPositions[i][0]
3944 + this.foundTaskTags[i].length;
3945 int max_value = i + 1 < this.foundTaskCount
3946 ? this.foundTaskPositions[i + 1][0] - 1
3948 // at most beginning of next task
3949 if (max_value < msgStart)
3950 max_value = msgStart; // would only occur if tag is before EOF.
3953 for (int j = msgStart; j < max_value; j++) {
3954 if ((c = this.source[j]) == '\n' || c == '\r') {
3960 for (int j = max_value; j > msgStart; j--) {
3961 if ((c = this.source[j]) == '*') {
3969 if (msgStart == end)
3972 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3974 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3976 // update the end position of the task
3977 this.foundTaskPositions[i][1] = end;
3978 // get the message source
3979 final int messageLength = end - msgStart + 1;
3980 char[] message = new char[messageLength];
3981 System.arraycopy(source, msgStart, message, 0, messageLength);
3982 this.foundTaskMessages[i] = message;