1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
22 * APIs ares - getNextToken() which return the current type of the token
23 * (this value is not memorized by the scanner) - getCurrentTokenSource()
24 * which provides with the token "REAL" source (aka all unicode have been
25 * transformed into a correct char) - sourceStart gives the position into the
26 * stream - currentPosition-1 gives the sourceEnd position into the stream
29 private boolean assertMode;
30 public boolean useAssertAsAnIndentifier = false;
31 //flag indicating if processed source contains occurrences of keyword assert
32 public boolean containsAssertKeyword = false;
33 public boolean recordLineSeparator;
34 public boolean phpMode = false;
35 public char currentCharacter;
36 public int startPosition;
37 public int currentPosition;
38 public int initialPosition, eofPosition;
39 // after this position eof are generated instead of real token from the
41 public boolean tokenizeComments;
42 public boolean tokenizeWhiteSpace;
43 //source should be viewed as a window (aka a part)
44 //of a entire very large stream
47 public char[] withoutUnicodeBuffer;
48 public int withoutUnicodePtr;
49 //when == 0 ==> no unicode in the current token
50 public boolean unicodeAsBackSlash = false;
51 public boolean scanningFloatLiteral = false;
52 //support for /** comments
53 //public char[][] comments = new char[10][];
54 public int[] commentStops = new int[10];
55 public int[] commentStarts = new int[10];
56 public int commentPtr = -1; // no comment test with commentPtr value -1
57 //diet parsing support - jump over some method body when requested
58 public boolean diet = false;
59 //support for the poor-line-debuggers ....
60 //remember the position of the cr/lf
61 public int[] lineEnds = new int[250];
62 public int linePtr = -1;
63 public boolean wasAcr = false;
64 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
65 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
66 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
67 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
68 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
69 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
70 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
71 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
72 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
73 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
74 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
75 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
76 //----------------optimized identifier managment------------------
77 static final char[] charArray_a = new char[]{'a'},
78 charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
79 charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
80 charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
81 charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
82 charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
83 charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
84 charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
85 charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
86 charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
87 charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
88 charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
89 charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
90 charArray_z = new char[]{'z'};
91 static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
92 '\u0000', '\u0000', '\u0000'};
93 static final int TableSize = 30, InternalTableSize = 6;
95 public static final int OptimizedLength = 6;
97 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
98 // support for detecting non-externalized string literals
99 int currentLineNr = -1;
100 int previousLineNr = -1;
101 NLSLine currentLine = null;
102 List lines = new ArrayList();
103 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
104 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
105 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
106 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
107 public StringLiteral[] nonNLSStrings = null;
108 public boolean checkNonExternalizedStringLiterals = true;
109 public boolean wasNonExternalizedStringLiteral = false;
111 for (int i = 0; i < 6; i++) {
112 for (int j = 0; j < TableSize; j++) {
113 for (int k = 0; k < InternalTableSize; k++) {
114 charArray_length[i][j][k] = initCharArray;
119 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
121 public static final int RoundBracket = 0;
122 public static final int SquareBracket = 1;
123 public static final int CurlyBracket = 2;
124 public static final int BracketKinds = 3;
126 public char[][] foundTaskTags = null;
127 public char[][] foundTaskMessages;
128 public char[][] foundTaskPriorities = null;
129 public int[][] foundTaskPositions;
130 public int foundTaskCount = 0;
131 public char[][] taskTags = null;
132 public char[][] taskPriorities = null;
133 public static final boolean DEBUG = false;
134 public static final boolean TRACE = false;
138 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
139 this(tokenizeComments, tokenizeWhiteSpace, false);
142 * Determines if the specified character is permissible as the first
143 * character in a PHP identifier
145 public static boolean isPHPIdentifierStart(char ch) {
146 return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
149 * Determines if the specified character may be part of a PHP identifier as
150 * other than the first character
152 public static boolean isPHPIdentifierPart(char ch) {
153 return Character.isLetterOrDigit(ch) || (ch == '_')
154 || (0x7F <= ch && ch <= 0xFF);
156 public final boolean atEnd() {
157 // This code is not relevant if source is
158 // Only a part of the real stream input
159 return source.length == currentPosition;
161 public char[] getCurrentIdentifierSource() {
162 //return the token REAL source (aka unicodes are precomputed)
164 // if (withoutUnicodePtr != 0)
165 // //0 is used as a fast test flag so the real first char is in position 1
167 // withoutUnicodeBuffer,
169 // result = new char[withoutUnicodePtr],
171 // withoutUnicodePtr);
173 int length = currentPosition - startPosition;
174 switch (length) { // see OptimizedLength
176 return optimizedCurrentTokenSource1();
178 return optimizedCurrentTokenSource2();
180 return optimizedCurrentTokenSource3();
182 return optimizedCurrentTokenSource4();
184 return optimizedCurrentTokenSource5();
186 return optimizedCurrentTokenSource6();
189 System.arraycopy(source, startPosition, result = new char[length], 0,
194 public int getCurrentTokenEndPosition() {
195 return this.currentPosition - 1;
197 public final char[] getCurrentTokenSource() {
198 // Return the token REAL source (aka unicodes are precomputed)
200 // if (withoutUnicodePtr != 0)
201 // // 0 is used as a fast test flag so the real first char is in position 1
203 // withoutUnicodeBuffer,
205 // result = new char[withoutUnicodePtr],
207 // withoutUnicodePtr);
210 System.arraycopy(source, startPosition,
211 result = new char[length = currentPosition - startPosition], 0, length);
215 public final char[] getCurrentTokenSource(int startPos) {
216 // Return the token REAL source (aka unicodes are precomputed)
218 // if (withoutUnicodePtr != 0)
219 // // 0 is used as a fast test flag so the real first char is in position 1
221 // withoutUnicodeBuffer,
223 // result = new char[withoutUnicodePtr],
225 // withoutUnicodePtr);
228 System.arraycopy(source, startPos,
229 result = new char[length = currentPosition - startPos], 0, length);
233 public final char[] getCurrentTokenSourceString() {
234 //return the token REAL source (aka unicodes are precomputed).
235 //REMOVE the two " that are at the beginning and the end.
237 if (withoutUnicodePtr != 0)
238 //0 is used as a fast test flag so the real first char is in position 1
239 System.arraycopy(withoutUnicodeBuffer, 2,
240 //2 is 1 (real start) + 1 (to jump over the ")
241 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
244 System.arraycopy(source, startPosition + 1,
245 result = new char[length = currentPosition - startPosition - 2], 0,
250 public int getCurrentTokenStartPosition() {
251 return this.startPosition;
253 public final char[] getCurrentStringLiteralSource() {
254 // Return the token REAL source (aka unicodes are precomputed)
257 System.arraycopy(source, startPosition + 1,
258 result = new char[length = currentPosition - startPosition - 2], 0,
264 * Search the source position corresponding to the end of a given line number
266 * Line numbers are 1-based, and relative to the scanner initialPosition.
267 * Character positions are 0-based.
269 * In case the given line number is inconsistent, answers -1.
271 public final int getLineEnd(int lineNumber) {
272 if (lineEnds == null)
274 if (lineNumber >= lineEnds.length)
278 if (lineNumber == lineEnds.length - 1)
280 return lineEnds[lineNumber - 1];
281 // next line start one character behind the lineEnd of the previous line
284 * Search the source position corresponding to the beginning of a given line
287 * Line numbers are 1-based, and relative to the scanner initialPosition.
288 * Character positions are 0-based.
290 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
292 * In case the given line number is inconsistent, answers -1.
294 public final int getLineStart(int lineNumber) {
295 if (lineEnds == null)
297 if (lineNumber >= lineEnds.length)
302 return initialPosition;
303 return lineEnds[lineNumber - 2] + 1;
304 // next line start one character behind the lineEnd of the previous line
306 public final boolean getNextChar(char testedChar) {
308 //handle the case of unicode.
309 //when a unicode appears then we must use a buffer that holds char
311 //At the end of this method currentCharacter holds the new visited char
312 //and currentPosition points right next after it
313 //Both previous lines are true if the currentCharacter is == to the
315 //On false, no side effect has occured.
316 //ALL getNextChar.... ARE OPTIMIZED COPIES
317 int temp = currentPosition;
319 currentCharacter = source[currentPosition++];
320 // if (((currentCharacter = source[currentPosition++]) == '\\')
321 // && (source[currentPosition] == 'u')) {
322 // //-------------unicode traitement ------------
323 // int c1, c2, c3, c4;
324 // int unicodeSize = 6;
325 // currentPosition++;
326 // while (source[currentPosition] == 'u') {
327 // currentPosition++;
331 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
333 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
335 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
337 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
339 // currentPosition = temp;
343 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
344 // if (currentCharacter != testedChar) {
345 // currentPosition = temp;
348 // unicodeAsBackSlash = currentCharacter == '\\';
350 // //need the unicode buffer
351 // if (withoutUnicodePtr == 0) {
352 // //buffer all the entries that have been left aside....
353 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
357 // withoutUnicodeBuffer,
359 // withoutUnicodePtr);
361 // //fill the buffer with the char
362 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
365 // } //-------------end unicode traitement--------------
367 if (currentCharacter != testedChar) {
368 currentPosition = temp;
371 unicodeAsBackSlash = false;
372 // if (withoutUnicodePtr != 0)
373 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
376 } catch (IndexOutOfBoundsException e) {
377 unicodeAsBackSlash = false;
378 currentPosition = temp;
382 public final int getNextChar(char testedChar1, char testedChar2) {
383 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
384 //test can be done with (x==0) for the first and (x>0) for the second
385 //handle the case of unicode.
386 //when a unicode appears then we must use a buffer that holds char
388 //At the end of this method currentCharacter holds the new visited char
389 //and currentPosition points right next after it
390 //Both previous lines are true if the currentCharacter is == to the
392 //On false, no side effect has occured.
393 //ALL getNextChar.... ARE OPTIMIZED COPIES
394 int temp = currentPosition;
397 currentCharacter = source[currentPosition++];
398 // if (((currentCharacter = source[currentPosition++]) == '\\')
399 // && (source[currentPosition] == 'u')) {
400 // //-------------unicode traitement ------------
401 // int c1, c2, c3, c4;
402 // int unicodeSize = 6;
403 // currentPosition++;
404 // while (source[currentPosition] == 'u') {
405 // currentPosition++;
409 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
411 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
413 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
415 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
417 // currentPosition = temp;
421 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
422 // if (currentCharacter == testedChar1)
424 // else if (currentCharacter == testedChar2)
427 // currentPosition = temp;
431 // //need the unicode buffer
432 // if (withoutUnicodePtr == 0) {
433 // //buffer all the entries that have been left aside....
434 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
438 // withoutUnicodeBuffer,
440 // withoutUnicodePtr);
442 // //fill the buffer with the char
443 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
445 // } //-------------end unicode traitement--------------
447 if (currentCharacter == testedChar1)
449 else if (currentCharacter == testedChar2)
452 currentPosition = temp;
455 // if (withoutUnicodePtr != 0)
456 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
459 } catch (IndexOutOfBoundsException e) {
460 currentPosition = temp;
464 public final boolean getNextCharAsDigit() {
466 //handle the case of unicode.
467 //when a unicode appears then we must use a buffer that holds char
469 //At the end of this method currentCharacter holds the new visited char
470 //and currentPosition points right next after it
471 //Both previous lines are true if the currentCharacter is a digit
472 //On false, no side effect has occured.
473 //ALL getNextChar.... ARE OPTIMIZED COPIES
474 int temp = currentPosition;
476 currentCharacter = source[currentPosition++];
477 // if (((currentCharacter = source[currentPosition++]) == '\\')
478 // && (source[currentPosition] == 'u')) {
479 // //-------------unicode traitement ------------
480 // int c1, c2, c3, c4;
481 // int unicodeSize = 6;
482 // currentPosition++;
483 // while (source[currentPosition] == 'u') {
484 // currentPosition++;
488 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
490 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
492 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
494 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
496 // currentPosition = temp;
500 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
501 // if (!Character.isDigit(currentCharacter)) {
502 // currentPosition = temp;
506 // //need the unicode buffer
507 // if (withoutUnicodePtr == 0) {
508 // //buffer all the entries that have been left aside....
509 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
513 // withoutUnicodeBuffer,
515 // withoutUnicodePtr);
517 // //fill the buffer with the char
518 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
520 // } //-------------end unicode traitement--------------
522 if (!Character.isDigit(currentCharacter)) {
523 currentPosition = temp;
526 // if (withoutUnicodePtr != 0)
527 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
530 } catch (IndexOutOfBoundsException e) {
531 currentPosition = temp;
535 public final boolean getNextCharAsDigit(int radix) {
537 //handle the case of unicode.
538 //when a unicode appears then we must use a buffer that holds char
540 //At the end of this method currentCharacter holds the new visited char
541 //and currentPosition points right next after it
542 //Both previous lines are true if the currentCharacter is a digit base on
544 //On false, no side effect has occured.
545 //ALL getNextChar.... ARE OPTIMIZED COPIES
546 int temp = currentPosition;
548 currentCharacter = source[currentPosition++];
549 // if (((currentCharacter = source[currentPosition++]) == '\\')
550 // && (source[currentPosition] == 'u')) {
551 // //-------------unicode traitement ------------
552 // int c1, c2, c3, c4;
553 // int unicodeSize = 6;
554 // currentPosition++;
555 // while (source[currentPosition] == 'u') {
556 // currentPosition++;
560 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
562 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
564 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
566 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
568 // currentPosition = temp;
572 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
573 // if (Character.digit(currentCharacter, radix) == -1) {
574 // currentPosition = temp;
578 // //need the unicode buffer
579 // if (withoutUnicodePtr == 0) {
580 // //buffer all the entries that have been left aside....
581 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
585 // withoutUnicodeBuffer,
587 // withoutUnicodePtr);
589 // //fill the buffer with the char
590 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
592 // } //-------------end unicode traitement--------------
594 if (Character.digit(currentCharacter, radix) == -1) {
595 currentPosition = temp;
598 // if (withoutUnicodePtr != 0)
599 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
602 } catch (IndexOutOfBoundsException e) {
603 currentPosition = temp;
607 public boolean getNextCharAsJavaIdentifierPart() {
609 //handle the case of unicode.
610 //when a unicode appears then we must use a buffer that holds char
612 //At the end of this method currentCharacter holds the new visited char
613 //and currentPosition points right next after it
614 //Both previous lines are true if the currentCharacter is a
615 // JavaIdentifierPart
616 //On false, no side effect has occured.
617 //ALL getNextChar.... ARE OPTIMIZED COPIES
618 int temp = currentPosition;
620 currentCharacter = source[currentPosition++];
621 // if (((currentCharacter = source[currentPosition++]) == '\\')
622 // && (source[currentPosition] == 'u')) {
623 // //-------------unicode traitement ------------
624 // int c1, c2, c3, c4;
625 // int unicodeSize = 6;
626 // currentPosition++;
627 // while (source[currentPosition] == 'u') {
628 // currentPosition++;
632 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
634 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
636 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
638 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
640 // currentPosition = temp;
644 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
645 // if (!isPHPIdentifierPart(currentCharacter)) {
646 // currentPosition = temp;
650 // //need the unicode buffer
651 // if (withoutUnicodePtr == 0) {
652 // //buffer all the entries that have been left aside....
653 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
657 // withoutUnicodeBuffer,
659 // withoutUnicodePtr);
661 // //fill the buffer with the char
662 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
664 // } //-------------end unicode traitement--------------
666 if (!isPHPIdentifierPart(currentCharacter)) {
667 currentPosition = temp;
670 // if (withoutUnicodePtr != 0)
671 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
674 } catch (IndexOutOfBoundsException e) {
675 currentPosition = temp;
679 public int getCastOrParen() {
680 int tempPosition = currentPosition;
681 char tempCharacter = currentCharacter;
682 int tempToken = TokenNameLPAREN;
683 boolean found = false;
684 StringBuffer buf = new StringBuffer();
687 currentCharacter = source[currentPosition++];
688 } while (currentCharacter == ' ' || currentCharacter == '\t');
689 while ((currentCharacter >= 'a' && currentCharacter <= 'z')
690 || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
691 buf.append(currentCharacter);
692 currentCharacter = source[currentPosition++];
694 if (buf.length() >= 3 && buf.length() <= 7) {
695 char[] data = buf.toString().toCharArray();
697 switch (data.length) {
700 if ((data[index] == 'i') && (data[++index] == 'n')
701 && (data[++index] == 't')) {
703 tempToken = TokenNameintCAST;
708 if ((data[index] == 'b') && (data[++index] == 'o')
709 && (data[++index] == 'o') && (data[++index] == 'l')) {
711 tempToken = TokenNameboolCAST;
714 if ((data[index] == 'r') && (data[++index] == 'e')
715 && (data[++index] == 'a') && (data[++index] == 'l')) {
717 tempToken = TokenNamedoubleCAST;
723 if ((data[index] == 'a') && (data[++index] == 'r')
724 && (data[++index] == 'r') && (data[++index] == 'a')
725 && (data[++index] == 'y')) {
727 tempToken = TokenNamearrayCAST;
730 if ((data[index] == 'u') && (data[++index] == 'n')
731 && (data[++index] == 's') && (data[++index] == 'e')
732 && (data[++index] == 't')) {
734 tempToken = TokenNameunsetCAST;
737 if ((data[index] == 'f') && (data[++index] == 'l')
738 && (data[++index] == 'o') && (data[++index] == 'a')
739 && (data[++index] == 't')) {
741 tempToken = TokenNamedoubleCAST;
747 // object string double
748 if ((data[index] == 'o') && (data[++index] == 'b')
749 && (data[++index] == 'j') && (data[++index] == 'e')
750 && (data[++index] == 'c') && (data[++index] == 't')) {
752 tempToken = TokenNameobjectCAST;
755 if ((data[index] == 's') && (data[++index] == 't')
756 && (data[++index] == 'r') && (data[++index] == 'i')
757 && (data[++index] == 'n') && (data[++index] == 'g')) {
759 tempToken = TokenNamestringCAST;
762 if ((data[index] == 'd') && (data[++index] == 'o')
763 && (data[++index] == 'u') && (data[++index] == 'b')
764 && (data[++index] == 'l') && (data[++index] == 'e')) {
766 tempToken = TokenNamedoubleCAST;
773 if ((data[index] == 'b') && (data[++index] == 'o')
774 && (data[++index] == 'o') && (data[++index] == 'l')
775 && (data[++index] == 'e') && (data[++index] == 'a')
776 && (data[++index] == 'n')) {
778 tempToken = TokenNameboolCAST;
781 if ((data[index] == 'i') && (data[++index] == 'n')
782 && (data[++index] == 't') && (data[++index] == 'e')
783 && (data[++index] == 'g') && (data[++index] == 'e')
784 && (data[++index] == 'r')) {
786 tempToken = TokenNameintCAST;
792 while (currentCharacter == ' ' || currentCharacter == '\t') {
793 currentCharacter = source[currentPosition++];
795 if (currentCharacter == ')') {
800 } catch (IndexOutOfBoundsException e) {
802 currentCharacter = tempCharacter;
803 currentPosition = tempPosition;
804 return TokenNameLPAREN;
806 public int getNextToken() throws InvalidInputException {
808 return getInlinedHTML(currentPosition);
813 jumpOverMethodBody();
815 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
818 while (true) { //loop for jumping over comments
819 withoutUnicodePtr = 0;
820 //start with a new token (even comment written with unicode )
821 // ---------Consume white space and handles startPosition---------
822 int whiteStart = currentPosition;
823 boolean isWhiteSpace;
825 startPosition = currentPosition;
826 currentCharacter = source[currentPosition++];
827 // if (((currentCharacter = source[currentPosition++]) == '\\')
828 // && (source[currentPosition] == 'u')) {
829 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
831 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
832 checkNonExternalizeString();
833 if (recordLineSeparator) {
839 isWhiteSpace = (currentCharacter == ' ')
840 || Character.isWhitespace(currentCharacter);
842 } while (isWhiteSpace);
843 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
844 // reposition scanner in case we are interested by spaces as tokens
846 startPosition = whiteStart;
847 return TokenNameWHITESPACE;
849 //little trick to get out in the middle of a source compuation
850 if (currentPosition > eofPosition)
852 // ---------Identify the next token-------------
853 switch (currentCharacter) {
855 return getCastOrParen();
857 return TokenNameRPAREN;
859 return TokenNameLBRACE;
861 return TokenNameRBRACE;
863 return TokenNameLBRACKET;
865 return TokenNameRBRACKET;
867 return TokenNameSEMICOLON;
869 return TokenNameCOMMA;
871 if (getNextChar('='))
872 return TokenNameDOT_EQUAL;
873 if (getNextCharAsDigit())
874 return scanNumber(true);
879 if ((test = getNextChar('+', '=')) == 0)
880 return TokenNamePLUS_PLUS;
882 return TokenNamePLUS_EQUAL;
883 return TokenNamePLUS;
888 if ((test = getNextChar('-', '=')) == 0)
889 return TokenNameMINUS_MINUS;
891 return TokenNameMINUS_EQUAL;
892 if (getNextChar('>'))
893 return TokenNameMINUS_GREATER;
894 return TokenNameMINUS;
897 if (getNextChar('='))
898 return TokenNameTWIDDLE_EQUAL;
899 return TokenNameTWIDDLE;
901 if (getNextChar('=')) {
902 if (getNextChar('=')) {
903 return TokenNameNOT_EQUAL_EQUAL;
905 return TokenNameNOT_EQUAL;
909 if (getNextChar('='))
910 return TokenNameMULTIPLY_EQUAL;
911 return TokenNameMULTIPLY;
913 if (getNextChar('='))
914 return TokenNameREMAINDER_EQUAL;
915 return TokenNameREMAINDER;
918 int oldPosition = currentPosition;
920 currentCharacter = source[currentPosition++];
921 } catch (IndexOutOfBoundsException e) {
922 currentPosition = oldPosition;
923 return TokenNameLESS;
925 switch (currentCharacter) {
927 return TokenNameLESS_EQUAL;
929 return TokenNameNOT_EQUAL;
931 if (getNextChar('='))
932 return TokenNameLEFT_SHIFT_EQUAL;
933 if (getNextChar('<')) {
934 int heredocStart = currentPosition;
935 int heredocLength = 0;
936 currentCharacter = source[currentPosition++];
937 if (isPHPIdentifierStart(currentCharacter)) {
938 currentCharacter = source[currentPosition++];
940 return TokenNameERROR;
942 while (isPHPIdentifierPart(currentCharacter)) {
943 currentCharacter = source[currentPosition++];
945 heredocLength = currentPosition - heredocStart - 1;
946 // heredoc end-tag determination
947 boolean endTag = true;
950 ch = source[currentPosition++];
951 if (ch == '\r' || ch == '\n') {
952 if (recordLineSeparator) {
957 for (int i = 0; i < heredocLength; i++) {
958 if (source[currentPosition + i] != source[heredocStart
965 currentPosition += heredocLength - 1;
966 currentCharacter = source[currentPosition++];
967 break; // do...while loop
973 return TokenNameHEREDOC;
975 return TokenNameLEFT_SHIFT;
977 currentPosition = oldPosition;
978 return TokenNameLESS;
983 if ((test = getNextChar('=', '>')) == 0)
984 return TokenNameGREATER_EQUAL;
986 if ((test = getNextChar('=', '>')) == 0)
987 return TokenNameRIGHT_SHIFT_EQUAL;
988 return TokenNameRIGHT_SHIFT;
990 return TokenNameGREATER;
993 if (getNextChar('=')) {
994 if (getNextChar('=')) {
995 return TokenNameEQUAL_EQUAL_EQUAL;
997 return TokenNameEQUAL_EQUAL;
999 if (getNextChar('>'))
1000 return TokenNameEQUAL_GREATER;
1001 return TokenNameEQUAL;
1005 if ((test = getNextChar('&', '=')) == 0)
1006 return TokenNameAND_AND;
1008 return TokenNameAND_EQUAL;
1009 return TokenNameAND;
1014 if ((test = getNextChar('|', '=')) == 0)
1015 return TokenNameOR_OR;
1017 return TokenNameOR_EQUAL;
1021 if (getNextChar('='))
1022 return TokenNameXOR_EQUAL;
1023 return TokenNameXOR;
1025 if (getNextChar('>')) {
1027 if (currentPosition==source.length) {
1029 return TokenNameINLINE_HTML;
1031 return getInlinedHTML(currentPosition - 2);
1033 return TokenNameQUESTION;
1035 if (getNextChar(':'))
1036 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1037 return TokenNameCOLON;
1043 // if ((test = getNextChar('\n', '\r')) == 0) {
1044 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1047 // // relocate if finding another quote fairly close: thus unicode
1048 // '/u000D' will be fully consumed
1049 // for (int lookAhead = 0;
1052 // if (currentPosition + lookAhead
1053 // == source.length)
1055 // if (source[currentPosition + lookAhead]
1058 // if (source[currentPosition + lookAhead]
1060 // currentPosition += lookAhead + 1;
1064 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1067 // if (getNextChar('\'')) {
1068 // // relocate if finding another quote fairly close: thus unicode
1069 // '/u000D' will be fully consumed
1070 // for (int lookAhead = 0;
1073 // if (currentPosition + lookAhead
1074 // == source.length)
1076 // if (source[currentPosition + lookAhead]
1079 // if (source[currentPosition + lookAhead]
1081 // currentPosition += lookAhead + 1;
1085 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1087 // if (getNextChar('\\'))
1088 // scanEscapeCharacter();
1089 // else { // consume next character
1090 // unicodeAsBackSlash = false;
1091 // if (((currentCharacter = source[currentPosition++])
1093 // && (source[currentPosition] == 'u')) {
1094 // getNextUnicodeChar();
1096 // if (withoutUnicodePtr != 0) {
1097 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1098 // currentCharacter;
1102 // // if (getNextChar('\''))
1103 // // return TokenNameCharacterLiteral;
1104 // // relocate if finding another quote fairly close: thus unicode
1105 // '/u000D' will be fully consumed
1106 // for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1107 // if (currentPosition + lookAhead == source.length)
1109 // if (source[currentPosition + lookAhead] == '\n')
1111 // if (source[currentPosition + lookAhead] == '\'') {
1112 // currentPosition += lookAhead + 1;
1116 // throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1119 // consume next character
1120 unicodeAsBackSlash = false;
1121 currentCharacter = source[currentPosition++];
1122 // if (((currentCharacter = source[currentPosition++]) == '\\')
1123 // && (source[currentPosition] == 'u')) {
1124 // getNextUnicodeChar();
1126 // if (withoutUnicodePtr != 0) {
1127 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1128 // currentCharacter;
1131 while (currentCharacter != '\'') {
1132 /** ** in PHP \r and \n are valid in string literals *** */
1133 // if ((currentCharacter == '\n')
1134 // || (currentCharacter == '\r')) {
1135 // // relocate if finding another quote fairly close: thus
1136 // unicode '/u000D' will be fully consumed
1137 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1138 // if (currentPosition + lookAhead == source.length)
1140 // if (source[currentPosition + lookAhead] == '\n')
1142 // if (source[currentPosition + lookAhead] == '\"') {
1143 // currentPosition += lookAhead + 1;
1147 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1149 if (currentCharacter == '\\') {
1150 int escapeSize = currentPosition;
1151 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1152 //scanEscapeCharacter make a side effect on this value and
1153 // we need the previous value few lines down this one
1154 scanSingleQuotedEscapeCharacter();
1155 escapeSize = currentPosition - escapeSize;
1156 if (withoutUnicodePtr == 0) {
1157 //buffer all the entries that have been left aside....
1158 withoutUnicodePtr = currentPosition - escapeSize - 1
1160 System.arraycopy(source, startPosition,
1161 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1162 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1163 } else { //overwrite the / in the buffer
1164 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1165 if (backSlashAsUnicodeInString) { //there are TWO \ in
1167 // only one is correct
1168 withoutUnicodePtr--;
1172 // consume next character
1173 unicodeAsBackSlash = false;
1174 currentCharacter = source[currentPosition++];
1175 // if (((currentCharacter = source[currentPosition++]) ==
1177 // && (source[currentPosition] == 'u')) {
1178 // getNextUnicodeChar();
1180 if (withoutUnicodePtr != 0) {
1181 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1185 } catch (IndexOutOfBoundsException e) {
1186 throw new InvalidInputException(UNTERMINATED_STRING);
1187 } catch (InvalidInputException e) {
1188 if (e.getMessage().equals(INVALID_ESCAPE)) {
1189 // relocate if finding another quote fairly close: thus
1190 // unicode '/u000D' will be fully consumed
1191 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1192 if (currentPosition + lookAhead == source.length)
1194 if (source[currentPosition + lookAhead] == '\n')
1196 if (source[currentPosition + lookAhead] == '\'') {
1197 currentPosition += lookAhead + 1;
1204 if (checkNonExternalizedStringLiterals) { // check for presence
1206 // //$NON-NLS-?$ where
1208 if (currentLine == null) {
1209 currentLine = new NLSLine();
1210 lines.add(currentLine);
1212 currentLine.add(new StringLiteral(
1213 getCurrentTokenSourceString(), startPosition,
1214 currentPosition - 1));
1216 return TokenNameStringConstant;
1219 // consume next character
1220 unicodeAsBackSlash = false;
1221 currentCharacter = source[currentPosition++];
1222 // if (((currentCharacter = source[currentPosition++]) == '\\')
1223 // && (source[currentPosition] == 'u')) {
1224 // getNextUnicodeChar();
1226 // if (withoutUnicodePtr != 0) {
1227 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1228 // currentCharacter;
1231 while (currentCharacter != '"') {
1232 /** ** in PHP \r and \n are valid in string literals *** */
1233 // if ((currentCharacter == '\n')
1234 // || (currentCharacter == '\r')) {
1235 // // relocate if finding another quote fairly close: thus
1236 // unicode '/u000D' will be fully consumed
1237 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1238 // if (currentPosition + lookAhead == source.length)
1240 // if (source[currentPosition + lookAhead] == '\n')
1242 // if (source[currentPosition + lookAhead] == '\"') {
1243 // currentPosition += lookAhead + 1;
1247 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1249 if (currentCharacter == '\\') {
1250 int escapeSize = currentPosition;
1251 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1252 //scanEscapeCharacter make a side effect on this value and
1253 // we need the previous value few lines down this one
1254 scanDoubleQuotedEscapeCharacter();
1255 escapeSize = currentPosition - escapeSize;
1256 if (withoutUnicodePtr == 0) {
1257 //buffer all the entries that have been left aside....
1258 withoutUnicodePtr = currentPosition - escapeSize - 1
1260 System.arraycopy(source, startPosition,
1261 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1262 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1263 } else { //overwrite the / in the buffer
1264 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1265 if (backSlashAsUnicodeInString) { //there are TWO \ in
1267 // only one is correct
1268 withoutUnicodePtr--;
1272 // consume next character
1273 unicodeAsBackSlash = false;
1274 currentCharacter = source[currentPosition++];
1275 // if (((currentCharacter = source[currentPosition++]) ==
1277 // && (source[currentPosition] == 'u')) {
1278 // getNextUnicodeChar();
1280 if (withoutUnicodePtr != 0) {
1281 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1285 } catch (IndexOutOfBoundsException e) {
1286 throw new InvalidInputException(UNTERMINATED_STRING);
1287 } catch (InvalidInputException e) {
1288 if (e.getMessage().equals(INVALID_ESCAPE)) {
1289 // relocate if finding another quote fairly close: thus
1290 // unicode '/u000D' will be fully consumed
1291 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1292 if (currentPosition + lookAhead == source.length)
1294 if (source[currentPosition + lookAhead] == '\n')
1296 if (source[currentPosition + lookAhead] == '\"') {
1297 currentPosition += lookAhead + 1;
1304 if (checkNonExternalizedStringLiterals) { // check for presence
1306 // //$NON-NLS-?$ where
1308 if (currentLine == null) {
1309 currentLine = new NLSLine();
1310 lines.add(currentLine);
1312 currentLine.add(new StringLiteral(
1313 getCurrentTokenSourceString(), startPosition,
1314 currentPosition - 1));
1316 return TokenNameStringLiteral;
1319 // consume next character
1320 unicodeAsBackSlash = false;
1321 currentCharacter = source[currentPosition++];
1322 // if (((currentCharacter = source[currentPosition++]) == '\\')
1323 // && (source[currentPosition] == 'u')) {
1324 // getNextUnicodeChar();
1326 // if (withoutUnicodePtr != 0) {
1327 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1328 // currentCharacter;
1331 while (currentCharacter != '`') {
1332 /** ** in PHP \r and \n are valid in string literals *** */
1333 // if ((currentCharacter == '\n')
1334 // || (currentCharacter == '\r')) {
1335 // // relocate if finding another quote fairly close: thus
1336 // unicode '/u000D' will be fully consumed
1337 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1338 // if (currentPosition + lookAhead == source.length)
1340 // if (source[currentPosition + lookAhead] == '\n')
1342 // if (source[currentPosition + lookAhead] == '\"') {
1343 // currentPosition += lookAhead + 1;
1347 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1349 if (currentCharacter == '\\') {
1350 int escapeSize = currentPosition;
1351 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1352 //scanEscapeCharacter make a side effect on this value and
1353 // we need the previous value few lines down this one
1354 scanDoubleQuotedEscapeCharacter();
1355 escapeSize = currentPosition - escapeSize;
1356 if (withoutUnicodePtr == 0) {
1357 //buffer all the entries that have been left aside....
1358 withoutUnicodePtr = currentPosition - escapeSize - 1
1360 System.arraycopy(source, startPosition,
1361 withoutUnicodeBuffer, 1, withoutUnicodePtr);
1362 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1363 } else { //overwrite the / in the buffer
1364 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1365 if (backSlashAsUnicodeInString) { //there are TWO \ in
1367 // only one is correct
1368 withoutUnicodePtr--;
1372 // consume next character
1373 unicodeAsBackSlash = false;
1374 currentCharacter = source[currentPosition++];
1375 // if (((currentCharacter = source[currentPosition++]) ==
1377 // && (source[currentPosition] == 'u')) {
1378 // getNextUnicodeChar();
1380 if (withoutUnicodePtr != 0) {
1381 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1385 } catch (IndexOutOfBoundsException e) {
1386 throw new InvalidInputException(UNTERMINATED_STRING);
1387 } catch (InvalidInputException e) {
1388 if (e.getMessage().equals(INVALID_ESCAPE)) {
1389 // relocate if finding another quote fairly close: thus
1390 // unicode '/u000D' will be fully consumed
1391 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1392 if (currentPosition + lookAhead == source.length)
1394 if (source[currentPosition + lookAhead] == '\n')
1396 if (source[currentPosition + lookAhead] == '`') {
1397 currentPosition += lookAhead + 1;
1404 if (checkNonExternalizedStringLiterals) { // check for presence
1406 // //$NON-NLS-?$ where
1408 if (currentLine == null) {
1409 currentLine = new NLSLine();
1410 lines.add(currentLine);
1412 currentLine.add(new StringLiteral(
1413 getCurrentTokenSourceString(), startPosition,
1414 currentPosition - 1));
1416 return TokenNameStringInterpolated;
1420 char startChar = currentCharacter;
1421 if (getNextChar('=')) {
1422 return TokenNameDIVIDE_EQUAL;
1425 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1427 int endPositionForLineComment = 0;
1428 try { //get the next char
1429 currentCharacter = source[currentPosition++];
1430 // if (((currentCharacter = source[currentPosition++])
1432 // && (source[currentPosition] == 'u')) {
1433 // //-------------unicode traitement ------------
1434 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1435 // currentPosition++;
1436 // while (source[currentPosition] == 'u') {
1437 // currentPosition++;
1440 // Character.getNumericValue(source[currentPosition++]))
1444 // Character.getNumericValue(source[currentPosition++]))
1448 // Character.getNumericValue(source[currentPosition++]))
1452 // Character.getNumericValue(source[currentPosition++]))
1455 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1457 // currentCharacter =
1458 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1461 //handle the \\u case manually into comment
1462 // if (currentCharacter == '\\') {
1463 // if (source[currentPosition] == '\\')
1464 // currentPosition++;
1465 // } //jump over the \\
1466 boolean isUnicode = false;
1467 while (currentCharacter != '\r' && currentCharacter != '\n') {
1468 if (currentCharacter == '?') {
1469 if (getNextChar('>')) {
1470 startPosition = currentPosition - 2;
1472 return TokenNameINLINE_HTML;
1477 currentCharacter = source[currentPosition++];
1478 // if (((currentCharacter = source[currentPosition++])
1480 // && (source[currentPosition] == 'u')) {
1481 // isUnicode = true;
1482 // //-------------unicode traitement ------------
1483 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1484 // currentPosition++;
1485 // while (source[currentPosition] == 'u') {
1486 // currentPosition++;
1489 // Character.getNumericValue(source[currentPosition++]))
1493 // Character.getNumericValue(
1494 // source[currentPosition++]))
1498 // Character.getNumericValue(
1499 // source[currentPosition++]))
1503 // Character.getNumericValue(
1504 // source[currentPosition++]))
1508 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1510 // currentCharacter =
1511 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1514 //handle the \\u case manually into comment
1515 // if (currentCharacter == '\\') {
1516 // if (source[currentPosition] == '\\')
1517 // currentPosition++;
1518 // } //jump over the \\
1521 endPositionForLineComment = currentPosition - 6;
1523 endPositionForLineComment = currentPosition - 1;
1525 recordComment(false);
1526 if ((currentCharacter == '\r')
1527 || (currentCharacter == '\n')) {
1528 checkNonExternalizeString();
1529 if (recordLineSeparator) {
1531 pushUnicodeLineSeparator();
1533 pushLineSeparator();
1539 if (tokenizeComments) {
1541 currentPosition = endPositionForLineComment;
1542 // reset one character behind
1544 return TokenNameCOMMENT_LINE;
1546 } catch (IndexOutOfBoundsException e) { //an eof will them
1548 if (tokenizeComments) {
1550 // reset one character behind
1551 return TokenNameCOMMENT_LINE;
1557 //traditional and annotation comment
1558 boolean isJavadoc = false, star = false;
1559 // consume next character
1560 unicodeAsBackSlash = false;
1561 currentCharacter = source[currentPosition++];
1562 // if (((currentCharacter = source[currentPosition++]) ==
1564 // && (source[currentPosition] == 'u')) {
1565 // getNextUnicodeChar();
1567 // if (withoutUnicodePtr != 0) {
1568 // withoutUnicodeBuffer[++withoutUnicodePtr] =
1569 // currentCharacter;
1572 if (currentCharacter == '*') {
1576 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1577 checkNonExternalizeString();
1578 if (recordLineSeparator) {
1579 pushLineSeparator();
1584 try { //get the next char
1585 currentCharacter = source[currentPosition++];
1586 // if (((currentCharacter = source[currentPosition++])
1588 // && (source[currentPosition] == 'u')) {
1589 // //-------------unicode traitement ------------
1590 // getNextUnicodeChar();
1592 //handle the \\u case manually into comment
1593 // if (currentCharacter == '\\') {
1594 // if (source[currentPosition] == '\\')
1595 // currentPosition++;
1596 // //jump over the \\
1598 // empty comment is not a javadoc /**/
1599 if (currentCharacter == '/') {
1602 //loop until end of comment */
1603 while ((currentCharacter != '/') || (!star)) {
1604 if ((currentCharacter == '\r')
1605 || (currentCharacter == '\n')) {
1606 checkNonExternalizeString();
1607 if (recordLineSeparator) {
1608 pushLineSeparator();
1613 star = currentCharacter == '*';
1615 currentCharacter = source[currentPosition++];
1616 // if (((currentCharacter = source[currentPosition++])
1618 // && (source[currentPosition] == 'u')) {
1619 // //-------------unicode traitement ------------
1620 // getNextUnicodeChar();
1622 //handle the \\u case manually into comment
1623 // if (currentCharacter == '\\') {
1624 // if (source[currentPosition] == '\\')
1625 // currentPosition++;
1626 // } //jump over the \\
1628 recordComment(isJavadoc);
1629 if (tokenizeComments) {
1631 return TokenNameCOMMENT_PHPDOC;
1632 return TokenNameCOMMENT_BLOCK;
1634 } catch (IndexOutOfBoundsException e) {
1635 throw new InvalidInputException(UNTERMINATED_COMMENT);
1639 return TokenNameDIVIDE;
1643 return TokenNameEOF;
1644 //the atEnd may not be <currentPosition == source.length> if
1645 // source is only some part of a real (external) stream
1646 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1648 if (currentCharacter == '$') {
1649 int oldPosition = currentPosition;
1651 currentCharacter = source[currentPosition++];
1652 if (isPHPIdentifierStart(currentCharacter)) {
1653 return scanIdentifierOrKeyword(true);
1655 currentPosition = oldPosition;
1656 return TokenNameDOLLAR;
1658 } catch (IndexOutOfBoundsException e) {
1659 currentPosition = oldPosition;
1660 return TokenNameDOLLAR;
1663 if (isPHPIdentifierStart(currentCharacter))
1664 return scanIdentifierOrKeyword(false);
1665 if (Character.isDigit(currentCharacter))
1666 return scanNumber(false);
1667 return TokenNameERROR;
1670 } //-----------------end switch while try--------------------
1671 catch (IndexOutOfBoundsException e) {
1674 return TokenNameEOF;
1678 * InvalidInputException
1680 private int getInlinedHTML(int start) throws InvalidInputException {
1681 // int htmlPosition = start;
1682 if (currentPosition>source.length) {
1683 currentPosition = source.length;
1684 return TokenNameEOF;
1686 startPosition = start;
1689 currentCharacter = source[currentPosition++];
1690 if (currentCharacter == '<') {
1691 if (getNextChar('?')) {
1692 currentCharacter = source[currentPosition++];
1693 if ((currentCharacter == ' ')
1694 || Character.isWhitespace(currentCharacter)) {
1697 return TokenNameINLINE_HTML;
1699 boolean phpStart = (currentCharacter == 'P')
1700 || (currentCharacter == 'p');
1702 int test = getNextChar('H', 'h');
1704 test = getNextChar('P', 'p');
1708 return TokenNameINLINE_HTML;
1715 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1716 if (recordLineSeparator) {
1717 pushLineSeparator();
1722 } //-----------------while--------------------
1724 return TokenNameINLINE_HTML;
1725 } //-----------------try--------------------
1726 catch (IndexOutOfBoundsException e) {
1727 startPosition = start;
1731 return TokenNameINLINE_HTML;
1733 // public final void getNextUnicodeChar()
1734 // throws IndexOutOfBoundsException, InvalidInputException {
1736 // //handle the case of unicode.
1737 // //when a unicode appears then we must use a buffer that holds char
1739 // //At the end of this method currentCharacter holds the new visited char
1740 // //and currentPosition points right next after it
1742 // //ALL getNextChar.... ARE OPTIMIZED COPIES
1744 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1745 // currentPosition++;
1746 // while (source[currentPosition] == 'u') {
1747 // currentPosition++;
1751 // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1753 // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1755 // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1757 // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1759 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1761 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1762 // //need the unicode buffer
1763 // if (withoutUnicodePtr == 0) {
1764 // //buffer all the entries that have been left aside....
1765 // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1766 // System.arraycopy(
1769 // withoutUnicodeBuffer,
1771 // withoutUnicodePtr);
1773 // //fill the buffer with the char
1774 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1776 // unicodeAsBackSlash = currentCharacter == '\\';
1779 * Tokenize a method body, assuming that curly brackets are properly
1782 public final void jumpOverMethodBody() {
1783 this.wasAcr = false;
1786 while (true) { //loop for jumping over comments
1787 // ---------Consume white space and handles startPosition---------
1788 boolean isWhiteSpace;
1790 startPosition = currentPosition;
1791 currentCharacter = source[currentPosition++];
1792 // if (((currentCharacter = source[currentPosition++]) == '\\')
1793 // && (source[currentPosition] == 'u')) {
1794 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1796 if (recordLineSeparator
1797 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1798 pushLineSeparator();
1799 isWhiteSpace = Character.isWhitespace(currentCharacter);
1801 } while (isWhiteSpace);
1802 // -------consume token until } is found---------
1803 switch (currentCharacter) {
1815 test = getNextChar('\\');
1818 scanDoubleQuotedEscapeCharacter();
1819 } catch (InvalidInputException ex) {
1822 // try { // consume next character
1823 unicodeAsBackSlash = false;
1824 currentCharacter = source[currentPosition++];
1825 // if (((currentCharacter = source[currentPosition++]) == '\\')
1826 // && (source[currentPosition] == 'u')) {
1827 // getNextUnicodeChar();
1829 if (withoutUnicodePtr != 0) {
1830 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1833 // } catch (InvalidInputException ex) {
1841 // try { // consume next character
1842 unicodeAsBackSlash = false;
1843 currentCharacter = source[currentPosition++];
1844 // if (((currentCharacter = source[currentPosition++]) == '\\')
1845 // && (source[currentPosition] == 'u')) {
1846 // getNextUnicodeChar();
1848 if (withoutUnicodePtr != 0) {
1849 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1852 // } catch (InvalidInputException ex) {
1854 while (currentCharacter != '"') {
1855 if (currentCharacter == '\r') {
1856 if (source[currentPosition] == '\n')
1859 // the string cannot go further that the line
1861 if (currentCharacter == '\n') {
1863 // the string cannot go further that the line
1865 if (currentCharacter == '\\') {
1867 scanDoubleQuotedEscapeCharacter();
1868 } catch (InvalidInputException ex) {
1871 // try { // consume next character
1872 unicodeAsBackSlash = false;
1873 currentCharacter = source[currentPosition++];
1874 // if (((currentCharacter = source[currentPosition++]) == '\\')
1875 // && (source[currentPosition] == 'u')) {
1876 // getNextUnicodeChar();
1878 if (withoutUnicodePtr != 0) {
1879 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1882 // } catch (InvalidInputException ex) {
1885 } catch (IndexOutOfBoundsException e) {
1892 if ((test = getNextChar('/', '*')) == 0) {
1896 currentCharacter = source[currentPosition++];
1897 // if (((currentCharacter = source[currentPosition++]) ==
1899 // && (source[currentPosition] == 'u')) {
1900 // //-------------unicode traitement ------------
1901 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1902 // currentPosition++;
1903 // while (source[currentPosition] == 'u') {
1904 // currentPosition++;
1907 // Character.getNumericValue(source[currentPosition++]))
1911 // Character.getNumericValue(source[currentPosition++]))
1915 // Character.getNumericValue(source[currentPosition++]))
1919 // Character.getNumericValue(source[currentPosition++]))
1922 // //error don't care of the value
1923 // currentCharacter = 'A';
1924 // } //something different from \n and \r
1926 // currentCharacter =
1927 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1930 while (currentCharacter != '\r' && currentCharacter != '\n') {
1932 currentCharacter = source[currentPosition++];
1933 // if (((currentCharacter = source[currentPosition++])
1935 // && (source[currentPosition] == 'u')) {
1936 // //-------------unicode traitement ------------
1937 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1938 // currentPosition++;
1939 // while (source[currentPosition] == 'u') {
1940 // currentPosition++;
1943 // Character.getNumericValue(source[currentPosition++]))
1947 // Character.getNumericValue(source[currentPosition++]))
1951 // Character.getNumericValue(source[currentPosition++]))
1955 // Character.getNumericValue(source[currentPosition++]))
1958 // //error don't care of the value
1959 // currentCharacter = 'A';
1960 // } //something different from \n and \r
1962 // currentCharacter =
1963 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1967 if (recordLineSeparator
1968 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1969 pushLineSeparator();
1970 } catch (IndexOutOfBoundsException e) {
1971 } //an eof will them be generated
1975 //traditional and annotation comment
1976 boolean star = false;
1977 // try { // consume next character
1978 unicodeAsBackSlash = false;
1979 currentCharacter = source[currentPosition++];
1980 // if (((currentCharacter = source[currentPosition++]) == '\\')
1981 // && (source[currentPosition] == 'u')) {
1982 // getNextUnicodeChar();
1984 if (withoutUnicodePtr != 0) {
1985 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1988 // } catch (InvalidInputException ex) {
1990 if (currentCharacter == '*') {
1993 if (recordLineSeparator
1994 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1995 pushLineSeparator();
1996 try { //get the next char
1997 currentCharacter = source[currentPosition++];
1998 // if (((currentCharacter = source[currentPosition++]) ==
2000 // && (source[currentPosition] == 'u')) {
2001 // //-------------unicode traitement ------------
2002 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2003 // currentPosition++;
2004 // while (source[currentPosition] == 'u') {
2005 // currentPosition++;
2008 // Character.getNumericValue(source[currentPosition++]))
2012 // Character.getNumericValue(source[currentPosition++]))
2016 // Character.getNumericValue(source[currentPosition++]))
2020 // Character.getNumericValue(source[currentPosition++]))
2023 // //error don't care of the value
2024 // currentCharacter = 'A';
2025 // } //something different from * and /
2027 // currentCharacter =
2028 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2031 //loop until end of comment */
2032 while ((currentCharacter != '/') || (!star)) {
2033 if (recordLineSeparator
2034 && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2035 pushLineSeparator();
2036 star = currentCharacter == '*';
2038 currentCharacter = source[currentPosition++];
2039 // if (((currentCharacter = source[currentPosition++])
2041 // && (source[currentPosition] == 'u')) {
2042 // //-------------unicode traitement ------------
2043 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2044 // currentPosition++;
2045 // while (source[currentPosition] == 'u') {
2046 // currentPosition++;
2049 // Character.getNumericValue(source[currentPosition++]))
2053 // Character.getNumericValue(source[currentPosition++]))
2057 // Character.getNumericValue(source[currentPosition++]))
2061 // Character.getNumericValue(source[currentPosition++]))
2064 // //error don't care of the value
2065 // currentCharacter = 'A';
2066 // } //something different from * and /
2068 // currentCharacter =
2069 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2073 } catch (IndexOutOfBoundsException e) {
2081 if (isPHPIdentifierStart(currentCharacter)
2082 || currentCharacter == '$') {
2084 scanIdentifierOrKeyword((currentCharacter == '$'));
2085 } catch (InvalidInputException ex) {
2089 if (Character.isDigit(currentCharacter)) {
2092 } catch (InvalidInputException ex) {
2098 //-----------------end switch while try--------------------
2099 } catch (IndexOutOfBoundsException e) {
2100 } catch (InvalidInputException e) {
2104 // public final boolean jumpOverUnicodeWhiteSpace()
2105 // throws InvalidInputException {
2107 // //handle the case of unicode. Jump over the next whiteSpace
2108 // //making startPosition pointing on the next available char
2109 // //On false, the currentCharacter is filled up with a potential
2113 // this.wasAcr = false;
2114 // int c1, c2, c3, c4;
2115 // int unicodeSize = 6;
2116 // currentPosition++;
2117 // while (source[currentPosition] == 'u') {
2118 // currentPosition++;
2122 // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2124 // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2126 // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2128 // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2130 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2133 // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2134 // if (recordLineSeparator
2135 // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2136 // pushLineSeparator();
2137 // if (Character.isWhitespace(currentCharacter))
2140 // //buffer the new char which is not a white space
2141 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2142 // //withoutUnicodePtr == 1 is true here
2144 // } catch (IndexOutOfBoundsException e) {
2145 // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2148 public final int[] getLineEnds() {
2149 //return a bounded copy of this.lineEnds
2151 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2154 public char[] getSource() {
2157 final char[] optimizedCurrentTokenSource1() {
2158 //return always the same char[] build only once
2159 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2160 char charOne = source[startPosition];
2215 return new char[]{charOne};
2218 final char[] optimizedCurrentTokenSource2() {
2219 //try to return the same char[] build only once
2221 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2223 char[][] table = charArray_length[0][hash];
2225 while (++i < InternalTableSize) {
2226 char[] charArray = table[i];
2227 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2230 //---------other side---------
2232 int max = newEntry2;
2233 while (++i <= max) {
2234 char[] charArray = table[i];
2235 if ((c0 == charArray[0]) && (c1 == charArray[1]))
2238 //--------add the entry-------
2239 if (++max >= InternalTableSize)
2242 table[max] = (r = new char[]{c0, c1});
2246 final char[] optimizedCurrentTokenSource3() {
2247 //try to return the same char[] build only once
2249 int hash = (((c0 = source[startPosition]) << 12)
2250 + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2252 char[][] table = charArray_length[1][hash];
2254 while (++i < InternalTableSize) {
2255 char[] charArray = table[i];
2256 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2259 //---------other side---------
2261 int max = newEntry3;
2262 while (++i <= max) {
2263 char[] charArray = table[i];
2264 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2267 //--------add the entry-------
2268 if (++max >= InternalTableSize)
2271 table[max] = (r = new char[]{c0, c1, c2});
2275 final char[] optimizedCurrentTokenSource4() {
2276 //try to return the same char[] build only once
2277 char c0, c1, c2, c3;
2278 long hash = ((((long) (c0 = source[startPosition])) << 18)
2279 + ((c1 = source[startPosition + 1]) << 12)
2280 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2282 char[][] table = charArray_length[2][(int) hash];
2284 while (++i < InternalTableSize) {
2285 char[] charArray = table[i];
2286 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2287 && (c3 == charArray[3]))
2290 //---------other side---------
2292 int max = newEntry4;
2293 while (++i <= max) {
2294 char[] charArray = table[i];
2295 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2296 && (c3 == charArray[3]))
2299 //--------add the entry-------
2300 if (++max >= InternalTableSize)
2303 table[max] = (r = new char[]{c0, c1, c2, c3});
2307 final char[] optimizedCurrentTokenSource5() {
2308 //try to return the same char[] build only once
2309 char c0, c1, c2, c3, c4;
2310 long hash = ((((long) (c0 = source[startPosition])) << 24)
2311 + (((long) (c1 = source[startPosition + 1])) << 18)
2312 + ((c2 = source[startPosition + 2]) << 12)
2313 + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2315 char[][] table = charArray_length[3][(int) hash];
2317 while (++i < InternalTableSize) {
2318 char[] charArray = table[i];
2319 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2320 && (c3 == charArray[3]) && (c4 == charArray[4]))
2323 //---------other side---------
2325 int max = newEntry5;
2326 while (++i <= max) {
2327 char[] charArray = table[i];
2328 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2329 && (c3 == charArray[3]) && (c4 == charArray[4]))
2332 //--------add the entry-------
2333 if (++max >= InternalTableSize)
2336 table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2340 final char[] optimizedCurrentTokenSource6() {
2341 //try to return the same char[] build only once
2342 char c0, c1, c2, c3, c4, c5;
2343 long hash = ((((long) (c0 = source[startPosition])) << 32)
2344 + (((long) (c1 = source[startPosition + 1])) << 24)
2345 + (((long) (c2 = source[startPosition + 2])) << 18)
2346 + ((c3 = source[startPosition + 3]) << 12)
2347 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2349 char[][] table = charArray_length[4][(int) hash];
2351 while (++i < InternalTableSize) {
2352 char[] charArray = table[i];
2353 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2354 && (c3 == charArray[3]) && (c4 == charArray[4])
2355 && (c5 == charArray[5]))
2358 //---------other side---------
2360 int max = newEntry6;
2361 while (++i <= max) {
2362 char[] charArray = table[i];
2363 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2364 && (c3 == charArray[3]) && (c4 == charArray[4])
2365 && (c5 == charArray[5]))
2368 //--------add the entry-------
2369 if (++max >= InternalTableSize)
2372 table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2376 public final void pushLineSeparator() throws InvalidInputException {
2377 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2378 final int INCREMENT = 250;
2379 if (this.checkNonExternalizedStringLiterals) {
2380 // reinitialize the current line for non externalize strings purpose
2383 //currentCharacter is at position currentPosition-1
2385 if (currentCharacter == '\r') {
2386 int separatorPos = currentPosition - 1;
2387 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2389 //System.out.println("CR-" + separatorPos);
2391 lineEnds[++linePtr] = separatorPos;
2392 } catch (IndexOutOfBoundsException e) {
2393 //linePtr value is correct
2394 int oldLength = lineEnds.length;
2395 int[] old = lineEnds;
2396 lineEnds = new int[oldLength + INCREMENT];
2397 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2398 lineEnds[linePtr] = separatorPos;
2400 // look-ahead for merged cr+lf
2402 if (source[currentPosition] == '\n') {
2403 //System.out.println("look-ahead LF-" + currentPosition);
2404 lineEnds[linePtr] = currentPosition;
2410 } catch (IndexOutOfBoundsException e) {
2415 if (currentCharacter == '\n') {
2416 //must merge eventual cr followed by lf
2417 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2418 //System.out.println("merge LF-" + (currentPosition - 1));
2419 lineEnds[linePtr] = currentPosition - 1;
2421 int separatorPos = currentPosition - 1;
2422 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2424 // System.out.println("LF-" + separatorPos);
2426 lineEnds[++linePtr] = separatorPos;
2427 } catch (IndexOutOfBoundsException e) {
2428 //linePtr value is correct
2429 int oldLength = lineEnds.length;
2430 int[] old = lineEnds;
2431 lineEnds = new int[oldLength + INCREMENT];
2432 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2433 lineEnds[linePtr] = separatorPos;
2440 public final void pushUnicodeLineSeparator() {
2441 // isUnicode means that the \r or \n has been read as a unicode character
2442 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2443 final int INCREMENT = 250;
2444 //currentCharacter is at position currentPosition-1
2445 if (this.checkNonExternalizedStringLiterals) {
2446 // reinitialize the current line for non externalize strings purpose
2450 if (currentCharacter == '\r') {
2451 int separatorPos = currentPosition - 6;
2452 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2454 //System.out.println("CR-" + separatorPos);
2456 lineEnds[++linePtr] = separatorPos;
2457 } catch (IndexOutOfBoundsException e) {
2458 //linePtr value is correct
2459 int oldLength = lineEnds.length;
2460 int[] old = lineEnds;
2461 lineEnds = new int[oldLength + INCREMENT];
2462 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2463 lineEnds[linePtr] = separatorPos;
2465 // look-ahead for merged cr+lf
2466 if (source[currentPosition] == '\n') {
2467 //System.out.println("look-ahead LF-" + currentPosition);
2468 lineEnds[linePtr] = currentPosition;
2476 if (currentCharacter == '\n') {
2477 //must merge eventual cr followed by lf
2478 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2479 //System.out.println("merge LF-" + (currentPosition - 1));
2480 lineEnds[linePtr] = currentPosition - 6;
2482 int separatorPos = currentPosition - 6;
2483 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2485 // System.out.println("LF-" + separatorPos);
2487 lineEnds[++linePtr] = separatorPos;
2488 } catch (IndexOutOfBoundsException e) {
2489 //linePtr value is correct
2490 int oldLength = lineEnds.length;
2491 int[] old = lineEnds;
2492 lineEnds = new int[oldLength + INCREMENT];
2493 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2494 lineEnds[linePtr] = separatorPos;
2501 public final void recordComment(boolean isJavadoc) {
2502 // a new annotation comment is recorded
2504 commentStops[++commentPtr] = isJavadoc
2507 } catch (IndexOutOfBoundsException e) {
2508 int oldStackLength = commentStops.length;
2509 int[] oldStack = commentStops;
2510 commentStops = new int[oldStackLength + 30];
2511 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2512 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2513 //grows the positions buffers too
2514 int[] old = commentStarts;
2515 commentStarts = new int[oldStackLength + 30];
2516 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2518 //the buffer is of a correct size here
2519 commentStarts[commentPtr] = startPosition;
2521 public void resetTo(int begin, int end) {
2522 //reset the scanner to a given position where it may rescan again
2524 initialPosition = startPosition = currentPosition = begin;
2525 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2526 commentPtr = -1; // reset comment stack
2528 public final void scanSingleQuotedEscapeCharacter()
2529 throws InvalidInputException {
2530 // the string with "\\u" is a legal string of two chars \ and u
2531 //thus we use a direct access to the source (for regular cases).
2532 // if (unicodeAsBackSlash) {
2533 // // consume next character
2534 // unicodeAsBackSlash = false;
2535 // if (((currentCharacter = source[currentPosition++]) == '\\')
2536 // && (source[currentPosition] == 'u')) {
2537 // getNextUnicodeChar();
2539 // if (withoutUnicodePtr != 0) {
2540 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2544 currentCharacter = source[currentPosition++];
2545 switch (currentCharacter) {
2547 currentCharacter = '\'';
2550 currentCharacter = '\\';
2553 currentCharacter = '\\';
2557 public final void scanDoubleQuotedEscapeCharacter()
2558 throws InvalidInputException {
2559 // the string with "\\u" is a legal string of two chars \ and u
2560 //thus we use a direct access to the source (for regular cases).
2561 // if (unicodeAsBackSlash) {
2562 // // consume next character
2563 // unicodeAsBackSlash = false;
2564 // if (((currentCharacter = source[currentPosition++]) == '\\')
2565 // && (source[currentPosition] == 'u')) {
2566 // getNextUnicodeChar();
2568 // if (withoutUnicodePtr != 0) {
2569 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2573 currentCharacter = source[currentPosition++];
2574 switch (currentCharacter) {
2576 // currentCharacter = '\b';
2579 currentCharacter = '\t';
2582 currentCharacter = '\n';
2585 // currentCharacter = '\f';
2588 currentCharacter = '\r';
2591 currentCharacter = '\"';
2594 currentCharacter = '\'';
2597 currentCharacter = '\\';
2600 currentCharacter = '$';
2603 // -----------octal escape--------------
2605 // OctalDigit OctalDigit
2606 // ZeroToThree OctalDigit OctalDigit
2607 int number = Character.getNumericValue(currentCharacter);
2608 if (number >= 0 && number <= 7) {
2609 boolean zeroToThreeNot = number > 3;
2610 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2611 int digit = Character.getNumericValue(currentCharacter);
2612 if (digit >= 0 && digit <= 7) {
2613 number = (number * 8) + digit;
2615 .isDigit(currentCharacter = source[currentPosition++])) {
2616 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2617 // Digit --> ignore last character
2620 digit = Character.getNumericValue(currentCharacter);
2621 if (digit >= 0 && digit <= 7) {
2622 // has read \ZeroToThree OctalDigit OctalDigit
2623 number = (number * 8) + digit;
2624 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2625 // --> ignore last character
2629 } else { // has read \OctalDigit NonDigit--> ignore last
2633 } else { // has read \OctalDigit NonOctalDigit--> ignore last
2637 } else { // has read \OctalDigit --> ignore last character
2641 throw new InvalidInputException(INVALID_ESCAPE);
2642 currentCharacter = (char) number;
2645 // throw new InvalidInputException(INVALID_ESCAPE);
2648 // public int scanIdentifierOrKeyword() throws InvalidInputException {
2649 // return scanIdentifierOrKeyword( false );
2651 public int scanIdentifierOrKeyword(boolean isVariable)
2652 throws InvalidInputException {
2654 //first dispatch on the first char.
2655 //then the length. If there are several
2656 //keywors with the same length AND the same first char, then do another
2657 //disptach on the second char :-)...cool....but fast !
2658 useAssertAsAnIndentifier = false;
2659 while (getNextCharAsJavaIdentifierPart()) {
2662 // if (new String(getCurrentTokenSource()).equals("$this")) {
2663 // return TokenNamethis;
2665 return TokenNameVariable;
2670 // if (withoutUnicodePtr == 0)
2671 //quick test on length == 1 but not on length > 12 while most identifier
2672 //have a length which is <= 12...but there are lots of identifier with
2675 if ((length = currentPosition - startPosition) == 1)
2676 return TokenNameIdentifier;
2678 data = new char[length];
2679 index = startPosition;
2680 for (int i = 0; i < length; i++) {
2681 data[i] = Character.toLowerCase(source[index + i]);
2685 // if ((length = withoutUnicodePtr) == 1)
2686 // return TokenNameIdentifier;
2687 // // data = withoutUnicodeBuffer;
2688 // data = new char[withoutUnicodeBuffer.length];
2689 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2690 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2694 firstLetter = data[index];
2695 switch (firstLetter) {
2700 if ((data[++index] == '_') && (data[++index] == 'f')
2701 && (data[++index] == 'i') && (data[++index] == 'l')
2702 && (data[++index] == 'e') && (data[++index] == '_')
2703 && (data[++index] == '_'))
2704 return TokenNameFILE;
2705 index = 0; //__LINE__
2706 if ((data[++index] == '_') && (data[++index] == 'l')
2707 && (data[++index] == 'i') && (data[++index] == 'n')
2708 && (data[++index] == 'e') && (data[++index] == '_')
2709 && (data[++index] == '_'))
2710 return TokenNameLINE;
2714 if ((data[++index] == '_') && (data[++index] == 'c')
2715 && (data[++index] == 'l') && (data[++index] == 'a')
2716 && (data[++index] == 's') && (data[++index] == 's')
2717 && (data[++index] == '_') && (data[++index] == '_'))
2718 return TokenNameCLASS_C;
2722 if ((data[++index] == '_') && (data[++index] == 'm')
2723 && (data[++index] == 'e') && (data[++index] == 't')
2724 && (data[++index] == 'h') && (data[++index] == 'o')
2725 && (data[++index] == 'd') && (data[++index] == '_')
2726 && (data[++index] == '_'))
2727 return TokenNameMETHOD_C;
2731 if ((data[++index] == '_') && (data[++index] == 'f')
2732 && (data[++index] == 'u') && (data[++index] == 'n')
2733 && (data[++index] == 'c') && (data[++index] == 't')
2734 && (data[++index] == 'i') && (data[++index] == 'o')
2735 && (data[++index] == 'n') && (data[++index] == '_')
2736 && (data[++index] == '_'))
2737 return TokenNameFUNC_C;
2740 return TokenNameIdentifier;
2742 // as and array abstract
2746 if ((data[++index] == 's')) {
2749 return TokenNameIdentifier;
2753 if ((data[++index] == 'n') && (data[++index] == 'd')) {
2754 return TokenNameand;
2756 return TokenNameIdentifier;
2760 if ((data[++index] == 'r') && (data[++index] == 'r')
2761 && (data[++index] == 'a') && (data[++index] == 'y'))
2762 return TokenNamearray;
2764 return TokenNameIdentifier;
2766 if ((data[++index] == 'b') && (data[++index] == 's')
2767 && (data[++index] == 't') && (data[++index] == 'r')
2768 && (data[++index] == 'a') && (data[++index] == 'c')
2769 && (data[++index] == 't'))
2770 return TokenNameabstract;
2772 return TokenNameIdentifier;
2774 return TokenNameIdentifier;
2780 if ((data[++index] == 'r') && (data[++index] == 'e')
2781 && (data[++index] == 'a') && (data[++index] == 'k'))
2782 return TokenNamebreak;
2784 return TokenNameIdentifier;
2786 return TokenNameIdentifier;
2789 //case catch class clone const continue
2792 if ((data[++index] == 'a') && (data[++index] == 's')
2793 && (data[++index] == 'e'))
2794 return TokenNamecase;
2796 return TokenNameIdentifier;
2798 if ((data[++index] == 'a') && (data[++index] == 't')
2799 && (data[++index] == 'c') && (data[++index] == 'h'))
2800 return TokenNamecatch;
2802 if ((data[++index] == 'l') && (data[++index] == 'a')
2803 && (data[++index] == 's') && (data[++index] == 's'))
2804 return TokenNameclass;
2806 if ((data[++index] == 'l') && (data[++index] == 'o')
2807 && (data[++index] == 'n') && (data[++index] == 'e'))
2808 return TokenNameclone;
2810 if ((data[++index] == 'o') && (data[++index] == 'n')
2811 && (data[++index] == 's') && (data[++index] == 't'))
2812 return TokenNameconst;
2814 return TokenNameIdentifier;
2816 if ((data[++index] == 'o') && (data[++index] == 'n')
2817 && (data[++index] == 't') && (data[++index] == 'i')
2818 && (data[++index] == 'n') && (data[++index] == 'u')
2819 && (data[++index] == 'e'))
2820 return TokenNamecontinue;
2822 return TokenNameIdentifier;
2824 return TokenNameIdentifier;
2827 // declare default do die
2828 // TODO delete define ==> no keyword !
2831 if ((data[++index] == 'o'))
2834 return TokenNameIdentifier;
2836 // if ((data[++index] == 'e')
2837 // && (data[++index] == 'f')
2838 // && (data[++index] == 'i')
2839 // && (data[++index] == 'n')
2840 // && (data[++index] == 'e'))
2841 // return TokenNamedefine;
2843 // return TokenNameIdentifier;
2845 if ((data[++index] == 'e') && (data[++index] == 'c')
2846 && (data[++index] == 'l') && (data[++index] == 'a')
2847 && (data[++index] == 'r') && (data[++index] == 'e'))
2848 return TokenNamedeclare;
2850 if ((data[++index] == 'e') && (data[++index] == 'f')
2851 && (data[++index] == 'a') && (data[++index] == 'u')
2852 && (data[++index] == 'l') && (data[++index] == 't'))
2853 return TokenNamedefault;
2855 return TokenNameIdentifier;
2857 return TokenNameIdentifier;
2860 //echo else exit elseif extends eval
2863 if ((data[++index] == 'c') && (data[++index] == 'h')
2864 && (data[++index] == 'o'))
2865 return TokenNameecho;
2866 else if ((data[index] == 'l') && (data[++index] == 's')
2867 && (data[++index] == 'e'))
2868 return TokenNameelse;
2869 else if ((data[index] == 'x') && (data[++index] == 'i')
2870 && (data[++index] == 't'))
2871 return TokenNameexit;
2872 else if ((data[index] == 'v') && (data[++index] == 'a')
2873 && (data[++index] == 'l'))
2874 return TokenNameeval;
2876 return TokenNameIdentifier;
2879 if ((data[++index] == 'n') && (data[++index] == 'd')
2880 && (data[++index] == 'i') && (data[++index] == 'f'))
2881 return TokenNameendif;
2882 if ((data[index] == 'm') && (data[++index] == 'p')
2883 && (data[++index] == 't') && (data[++index] == 'y'))
2884 return TokenNameempty;
2886 return TokenNameIdentifier;
2889 if ((data[++index] == 'n') && (data[++index] == 'd')
2890 && (data[++index] == 'f') && (data[++index] == 'o')
2891 && (data[++index] == 'r'))
2892 return TokenNameendfor;
2893 else if ((data[index] == 'l') && (data[++index] == 's')
2894 && (data[++index] == 'e') && (data[++index] == 'i')
2895 && (data[++index] == 'f'))
2896 return TokenNameelseif;
2898 return TokenNameIdentifier;
2900 if ((data[++index] == 'x') && (data[++index] == 't')
2901 && (data[++index] == 'e') && (data[++index] == 'n')
2902 && (data[++index] == 'd') && (data[++index] == 's'))
2903 return TokenNameextends;
2905 return TokenNameIdentifier;
2908 if ((data[++index] == 'n') && (data[++index] == 'd')
2909 && (data[++index] == 'w') && (data[++index] == 'h')
2910 && (data[++index] == 'i') && (data[++index] == 'l')
2911 && (data[++index] == 'e'))
2912 return TokenNameendwhile;
2914 return TokenNameIdentifier;
2917 if ((data[++index] == 'n') && (data[++index] == 'd')
2918 && (data[++index] == 's') && (data[++index] == 'w')
2919 && (data[++index] == 'i') && (data[++index] == 't')
2920 && (data[++index] == 'c') && (data[++index] == 'h'))
2921 return TokenNameendswitch;
2923 return TokenNameIdentifier;
2926 if ((data[++index] == 'n') && (data[++index] == 'd')
2927 && (data[++index] == 'd') && (data[++index] == 'e')
2928 && (data[++index] == 'c') && (data[++index] == 'l')
2929 && (data[++index] == 'a') && (data[++index] == 'r')
2930 && (data[++index] == 'e'))
2931 return TokenNameendforeach;
2933 if ((data[++index] == 'n') // endforeach
2934 && (data[++index] == 'd') && (data[++index] == 'f')
2935 && (data[++index] == 'o') && (data[++index] == 'r')
2936 && (data[++index] == 'e') && (data[++index] == 'a')
2937 && (data[++index] == 'c') && (data[++index] == 'h'))
2938 return TokenNameendforeach;
2940 return TokenNameIdentifier;
2942 return TokenNameIdentifier;
2945 //for false final function
2948 if ((data[++index] == 'o') && (data[++index] == 'r'))
2949 return TokenNamefor;
2951 return TokenNameIdentifier;
2953 // if ((data[++index] == 'a') && (data[++index] == 'l')
2954 // && (data[++index] == 's') && (data[++index] == 'e'))
2955 // return TokenNamefalse;
2956 if ((data[++index] == 'i') && (data[++index] == 'n')
2957 && (data[++index] == 'a') && (data[++index] == 'l'))
2958 return TokenNamefinal;
2960 return TokenNameIdentifier;
2963 if ((data[++index] == 'o') && (data[++index] == 'r')
2964 && (data[++index] == 'e') && (data[++index] == 'a')
2965 && (data[++index] == 'c') && (data[++index] == 'h'))
2966 return TokenNameforeach;
2968 return TokenNameIdentifier;
2971 if ((data[++index] == 'u') && (data[++index] == 'n')
2972 && (data[++index] == 'c') && (data[++index] == 't')
2973 && (data[++index] == 'i') && (data[++index] == 'o')
2974 && (data[++index] == 'n'))
2975 return TokenNamefunction;
2977 return TokenNameIdentifier;
2979 return TokenNameIdentifier;
2984 if ((data[++index] == 'l') && (data[++index] == 'o')
2985 && (data[++index] == 'b') && (data[++index] == 'a')
2986 && (data[++index] == 'l')) {
2987 return TokenNameglobal;
2990 return TokenNameIdentifier;
2992 //if int isset include include_once instanceof interface implements
2995 if (data[++index] == 'f')
2998 return TokenNameIdentifier;
3000 // if ((data[++index] == 'n') && (data[++index] == 't'))
3001 // return TokenNameint;
3003 // return TokenNameIdentifier;
3005 if ((data[++index] == 's') && (data[++index] == 's')
3006 && (data[++index] == 'e') && (data[++index] == 't'))
3007 return TokenNameisset;
3009 return TokenNameIdentifier;
3011 if ((data[++index] == 'n') && (data[++index] == 'c')
3012 && (data[++index] == 'l') && (data[++index] == 'u')
3013 && (data[++index] == 'd') && (data[++index] == 'e'))
3014 return TokenNameinclude;
3016 return TokenNameIdentifier;
3019 if ((data[++index] == 'n') && (data[++index] == 't')
3020 && (data[++index] == 'e') && (data[++index] == 'r')
3021 && (data[++index] == 'f') && (data[++index] == 'a')
3022 && (data[++index] == 'c') && (data[++index] == 'e'))
3023 return TokenNameinterface;
3025 return TokenNameIdentifier;
3028 if ((data[++index] == 'n') && (data[++index] == 's')
3029 && (data[++index] == 't') && (data[++index] == 'a')
3030 && (data[++index] == 'n') && (data[++index] == 'c')
3031 && (data[++index] == 'e') && (data[++index] == 'o')
3032 && (data[++index] == 'f'))
3033 return TokenNameinstanceof;
3034 if ((data[index] == 'm') && (data[++index] == 'p')
3035 && (data[++index] == 'l') && (data[++index] == 'e')
3036 && (data[++index] == 'm') && (data[++index] == 'e')
3037 && (data[++index] == 'n') && (data[++index] == 't')
3038 && (data[++index] == 's'))
3039 return TokenNameimplements;
3041 return TokenNameIdentifier;
3043 if ((data[++index] == 'n') && (data[++index] == 'c')
3044 && (data[++index] == 'l') && (data[++index] == 'u')
3045 && (data[++index] == 'd') && (data[++index] == 'e')
3046 && (data[++index] == '_') && (data[++index] == 'o')
3047 && (data[++index] == 'n') && (data[++index] == 'c')
3048 && (data[++index] == 'e'))
3049 return TokenNameinclude_once;
3051 return TokenNameIdentifier;
3053 return TokenNameIdentifier;
3058 if ((data[++index] == 'i') && (data[++index] == 's')
3059 && (data[++index] == 't')) {
3060 return TokenNamelist;
3063 return TokenNameIdentifier;
3068 if ((data[++index] == 'e') && (data[++index] == 'w'))
3069 return TokenNamenew;
3071 return TokenNameIdentifier;
3073 // if ((data[++index] == 'u') && (data[++index] == 'l')
3074 // && (data[++index] == 'l'))
3075 // return TokenNamenull;
3077 // return TokenNameIdentifier;
3079 return TokenNameIdentifier;
3084 if (data[++index] == 'r') {
3088 // if (length == 12) {
3089 // if ((data[++index] == 'l')
3090 // && (data[++index] == 'd')
3091 // && (data[++index] == '_')
3092 // && (data[++index] == 'f')
3093 // && (data[++index] == 'u')
3094 // && (data[++index] == 'n')
3095 // && (data[++index] == 'c')
3096 // && (data[++index] == 't')
3097 // && (data[++index] == 'i')
3098 // && (data[++index] == 'o')
3099 // && (data[++index] == 'n')) {
3100 // return TokenNameold_function;
3103 return TokenNameIdentifier;
3105 // print public private protected
3108 if ((data[++index] == 'r') && (data[++index] == 'i')
3109 && (data[++index] == 'n') && (data[++index] == 't')) {
3110 return TokenNameprint;
3112 return TokenNameIdentifier;
3114 if ((data[++index] == 'u') && (data[++index] == 'b')
3115 && (data[++index] == 'l') && (data[++index] == 'i')
3116 && (data[++index] == 'c')) {
3117 return TokenNamepublic;
3119 return TokenNameIdentifier;
3121 if ((data[++index] == 'r') && (data[++index] == 'i')
3122 && (data[++index] == 'v') && (data[++index] == 'a')
3123 && (data[++index] == 't') && (data[++index] == 'e')) {
3124 return TokenNameprivate;
3126 return TokenNameIdentifier;
3128 if ((data[++index] == 'r') && (data[++index] == 'o')
3129 && (data[++index] == 't') && (data[++index] == 'e')
3130 && (data[++index] == 'c') && (data[++index] == 't')
3131 && (data[++index] == 'e') && (data[++index] == 'd')) {
3132 return TokenNameprotected;
3134 return TokenNameIdentifier;
3136 return TokenNameIdentifier;
3138 //return require require_once
3140 if ((data[++index] == 'e') && (data[++index] == 't')
3141 && (data[++index] == 'u') && (data[++index] == 'r')
3142 && (data[++index] == 'n')) {
3143 return TokenNamereturn;
3145 } else if (length == 7) {
3146 if ((data[++index] == 'e') && (data[++index] == 'q')
3147 && (data[++index] == 'u') && (data[++index] == 'i')
3148 && (data[++index] == 'r') && (data[++index] == 'e')) {
3149 return TokenNamerequire;
3151 } else if (length == 12) {
3152 if ((data[++index] == 'e') && (data[++index] == 'q')
3153 && (data[++index] == 'u') && (data[++index] == 'i')
3154 && (data[++index] == 'r') && (data[++index] == 'e')
3155 && (data[++index] == '_') && (data[++index] == 'o')
3156 && (data[++index] == 'n') && (data[++index] == 'c')
3157 && (data[++index] == 'e')) {
3158 return TokenNamerequire_once;
3161 return TokenNameIdentifier;
3166 if (data[++index] == 't')
3167 if ((data[++index] == 'a') && (data[++index] == 't')
3168 && (data[++index] == 'i') && (data[++index] == 'c')) {
3169 return TokenNamestatic;
3171 return TokenNameIdentifier;
3172 else if ((data[index] == 'w') && (data[++index] == 'i')
3173 && (data[++index] == 't') && (data[++index] == 'c')
3174 && (data[++index] == 'h'))
3175 return TokenNameswitch;
3177 return TokenNameIdentifier;
3179 return TokenNameIdentifier;
3185 if ((data[++index] == 'r') && (data[++index] == 'y'))
3186 return TokenNametry;
3188 return TokenNameIdentifier;
3190 // if ((data[++index] == 'r') && (data[++index] == 'u')
3191 // && (data[++index] == 'e'))
3192 // return TokenNametrue;
3194 // return TokenNameIdentifier;
3196 if ((data[++index] == 'h') && (data[++index] == 'r')
3197 && (data[++index] == 'o') && (data[++index] == 'w'))
3198 return TokenNamethrow;
3200 return TokenNameIdentifier;
3202 return TokenNameIdentifier;
3208 if ((data[++index] == 's') && (data[++index] == 'e'))
3209 return TokenNameuse;
3211 return TokenNameIdentifier;
3213 if ((data[++index] == 'n') && (data[++index] == 's')
3214 && (data[++index] == 'e') && (data[++index] == 't'))
3215 return TokenNameunset;
3217 return TokenNameIdentifier;
3219 return TokenNameIdentifier;
3225 if ((data[++index] == 'a') && (data[++index] == 'r'))
3226 return TokenNamevar;
3228 return TokenNameIdentifier;
3230 return TokenNameIdentifier;
3236 if ((data[++index] == 'h') && (data[++index] == 'i')
3237 && (data[++index] == 'l') && (data[++index] == 'e'))
3238 return TokenNamewhile;
3240 return TokenNameIdentifier;
3241 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3242 // (data[++index]=='e') && (data[++index]=='f')&&
3243 // (data[++index]=='p'))
3244 //return TokenNamewidefp ;
3246 //return TokenNameIdentifier;
3248 return TokenNameIdentifier;
3254 if ((data[++index] == 'o') && (data[++index] == 'r'))
3255 return TokenNamexor;
3257 return TokenNameIdentifier;
3259 return TokenNameIdentifier;
3262 return TokenNameIdentifier;
3265 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3266 //when entering this method the currentCharacter is the firt
3267 //digit of the number , i.e. it may be preceeded by a . when
3269 boolean floating = dotPrefix;
3270 if ((!dotPrefix) && (currentCharacter == '0')) {
3271 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3272 //force the first char of the hexa number do exist...
3273 // consume next character
3274 unicodeAsBackSlash = false;
3275 currentCharacter = source[currentPosition++];
3276 // if (((currentCharacter = source[currentPosition++]) == '\\')
3277 // && (source[currentPosition] == 'u')) {
3278 // getNextUnicodeChar();
3280 // if (withoutUnicodePtr != 0) {
3281 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3284 if (Character.digit(currentCharacter, 16) == -1)
3285 throw new InvalidInputException(INVALID_HEXA);
3287 while (getNextCharAsDigit(16)) {
3289 // if (getNextChar('l', 'L') >= 0)
3290 // return TokenNameLongLiteral;
3292 return TokenNameIntegerLiteral;
3294 //there is x or X in the number
3295 //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3296 // 00078.0 is true !!!!! crazy language
3297 if (getNextCharAsDigit()) {
3298 //-------------potential octal-----------------
3299 while (getNextCharAsDigit()) {
3301 // if (getNextChar('l', 'L') >= 0) {
3302 // return TokenNameLongLiteral;
3305 // if (getNextChar('f', 'F') >= 0) {
3306 // return TokenNameFloatingPointLiteral;
3308 if (getNextChar('d', 'D') >= 0) {
3309 return TokenNameDoubleLiteral;
3310 } else { //make the distinction between octal and float ....
3311 if (getNextChar('.')) { //bingo ! ....
3312 while (getNextCharAsDigit()) {
3314 if (getNextChar('e', 'E') >= 0) {
3315 // consume next character
3316 unicodeAsBackSlash = false;
3317 currentCharacter = source[currentPosition++];
3318 // if (((currentCharacter = source[currentPosition++]) == '\\')
3319 // && (source[currentPosition] == 'u')) {
3320 // getNextUnicodeChar();
3322 // if (withoutUnicodePtr != 0) {
3323 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3326 if ((currentCharacter == '-') || (currentCharacter == '+')) {
3327 // consume next character
3328 unicodeAsBackSlash = false;
3329 currentCharacter = source[currentPosition++];
3330 // if (((currentCharacter = source[currentPosition++]) == '\\')
3331 // && (source[currentPosition] == 'u')) {
3332 // getNextUnicodeChar();
3334 // if (withoutUnicodePtr != 0) {
3335 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3336 // currentCharacter;
3340 if (!Character.isDigit(currentCharacter))
3341 throw new InvalidInputException(INVALID_FLOAT);
3342 while (getNextCharAsDigit()) {
3345 // if (getNextChar('f', 'F') >= 0)
3346 // return TokenNameFloatingPointLiteral;
3347 getNextChar('d', 'D'); //jump over potential d or D
3348 return TokenNameDoubleLiteral;
3350 return TokenNameIntegerLiteral;
3357 while (getNextCharAsDigit()) {
3359 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3360 // return TokenNameLongLiteral;
3361 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3362 while (getNextCharAsDigit()) {
3366 //if floating is true both exponant and suffix may be optional
3367 if (getNextChar('e', 'E') >= 0) {
3369 // consume next character
3370 unicodeAsBackSlash = false;
3371 currentCharacter = source[currentPosition++];
3372 // if (((currentCharacter = source[currentPosition++]) == '\\')
3373 // && (source[currentPosition] == 'u')) {
3374 // getNextUnicodeChar();
3376 // if (withoutUnicodePtr != 0) {
3377 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3380 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3383 unicodeAsBackSlash = false;
3384 currentCharacter = source[currentPosition++];
3385 // if (((currentCharacter = source[currentPosition++]) == '\\')
3386 // && (source[currentPosition] == 'u')) {
3387 // getNextUnicodeChar();
3389 // if (withoutUnicodePtr != 0) {
3390 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3394 if (!Character.isDigit(currentCharacter))
3395 throw new InvalidInputException(INVALID_FLOAT);
3396 while (getNextCharAsDigit()) {
3399 if (getNextChar('d', 'D') >= 0)
3400 return TokenNameDoubleLiteral;
3401 // if (getNextChar('f', 'F') >= 0)
3402 // return TokenNameFloatingPointLiteral;
3403 //the long flag has been tested before
3404 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3407 * Search the line number corresponding to a specific position
3410 public final int getLineNumber(int position) {
3411 if (lineEnds == null)
3413 int length = linePtr + 1;
3416 int g = 0, d = length - 1;
3420 if (position < lineEnds[m]) {
3422 } else if (position > lineEnds[m]) {
3428 if (position < lineEnds[m]) {
3433 public void setPHPMode(boolean mode) {
3436 public final void setSource(char[] source) {
3437 //the source-buffer is set to sourceString
3438 if (source == null) {
3439 this.source = new char[0];
3441 this.source = source;
3444 initialPosition = currentPosition = 0;
3445 containsAssertKeyword = false;
3446 withoutUnicodeBuffer = new char[this.source.length];
3448 public String toString() {
3449 if (startPosition == source.length)
3450 return "EOF\n\n" + new String(source); //$NON-NLS-1$
3451 if (currentPosition > source.length)
3452 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3453 char front[] = new char[startPosition];
3454 System.arraycopy(source, 0, front, 0, startPosition);
3455 int middleLength = (currentPosition - 1) - startPosition + 1;
3457 if (middleLength > -1) {
3458 middle = new char[middleLength];
3459 System.arraycopy(source, startPosition, middle, 0, middleLength);
3461 middle = new char[0];
3463 char end[] = new char[source.length - (currentPosition - 1)];
3464 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3465 - (currentPosition - 1) - 1);
3466 return new String(front)
3467 + "\n===============================\nStarts here -->" //$NON-NLS-1$
3468 + new String(middle)
3469 + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3472 public final String toStringAction(int act) {
3474 case TokenNameERROR :
3475 return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3477 case TokenNameINLINE_HTML :
3478 return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3479 case TokenNameIdentifier :
3480 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3481 case TokenNameVariable :
3482 return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3483 case TokenNameabstract :
3484 return "abstract"; //$NON-NLS-1$
3486 return "AND"; //$NON-NLS-1$
3487 case TokenNamearray :
3488 return "array"; //$NON-NLS-1$
3490 return "as"; //$NON-NLS-1$
3491 case TokenNamebreak :
3492 return "break"; //$NON-NLS-1$
3493 case TokenNamecase :
3494 return "case"; //$NON-NLS-1$
3495 case TokenNameclass :
3496 return "class"; //$NON-NLS-1$
3497 case TokenNameclone :
3500 case TokenNameconst :
3503 case TokenNamecontinue :
3504 return "continue"; //$NON-NLS-1$
3505 case TokenNamedefault :
3506 return "default"; //$NON-NLS-1$
3507 // case TokenNamedefine :
3508 // return "define"; //$NON-NLS-1$
3510 return "do"; //$NON-NLS-1$
3511 case TokenNameecho :
3512 return "echo"; //$NON-NLS-1$
3513 case TokenNameelse :
3514 return "else"; //$NON-NLS-1$
3515 case TokenNameelseif :
3516 return "elseif"; //$NON-NLS-1$
3517 case TokenNameendfor :
3518 return "endfor"; //$NON-NLS-1$
3519 case TokenNameendforeach :
3520 return "endforeach"; //$NON-NLS-1$
3521 case TokenNameendif :
3522 return "endif"; //$NON-NLS-1$
3523 case TokenNameendswitch :
3524 return "endswitch"; //$NON-NLS-1$
3525 case TokenNameendwhile :
3526 return "endwhile"; //$NON-NLS-1$
3527 case TokenNameextends :
3528 return "extends"; //$NON-NLS-1$
3529 // case TokenNamefalse :
3530 // return "false"; //$NON-NLS-1$
3531 case TokenNamefinal :
3532 return "final"; //$NON-NLS-1$
3534 return "for"; //$NON-NLS-1$
3535 case TokenNameforeach :
3536 return "foreach"; //$NON-NLS-1$
3537 case TokenNamefunction :
3538 return "function"; //$NON-NLS-1$
3539 case TokenNameglobal :
3540 return "global"; //$NON-NLS-1$
3542 return "if"; //$NON-NLS-1$
3543 case TokenNameimplements :
3544 return "implements"; //$NON-NLS-1$
3545 case TokenNameinclude :
3546 return "include"; //$NON-NLS-1$
3547 case TokenNameinclude_once :
3548 return "include_once"; //$NON-NLS-1$
3549 case TokenNameinterface :
3550 return "interface"; //$NON-NLS-1$
3551 case TokenNameisset :
3552 return "isset"; //$NON-NLS-1$
3553 case TokenNamelist :
3554 return "list"; //$NON-NLS-1$
3556 return "new"; //$NON-NLS-1$
3557 // case TokenNamenull :
3558 // return "null"; //$NON-NLS-1$
3560 return "OR"; //$NON-NLS-1$
3561 case TokenNameprint :
3562 return "print"; //$NON-NLS-1$
3563 case TokenNameprivate :
3564 return "private"; //$NON-NLS-1$
3565 case TokenNameprotected :
3566 return "protected"; //$NON-NLS-1$
3567 case TokenNamepublic :
3568 return "public"; //$NON-NLS-1$
3569 case TokenNamerequire :
3570 return "require"; //$NON-NLS-1$
3571 case TokenNamerequire_once :
3572 return "require_once"; //$NON-NLS-1$
3573 case TokenNamereturn :
3574 return "return"; //$NON-NLS-1$
3575 case TokenNamestatic :
3576 return "static"; //$NON-NLS-1$
3577 case TokenNameswitch :
3578 return "switch"; //$NON-NLS-1$
3579 // case TokenNametrue :
3580 // return "true"; //$NON-NLS-1$
3581 case TokenNameunset :
3582 return "unset"; //$NON-NLS-1$
3584 return "var"; //$NON-NLS-1$
3585 case TokenNamewhile :
3586 return "while"; //$NON-NLS-1$
3588 return "XOR"; //$NON-NLS-1$
3589 // case TokenNamethis :
3590 // return "$this"; //$NON-NLS-1$
3591 case TokenNameIntegerLiteral :
3592 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3593 case TokenNameDoubleLiteral :
3594 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3595 case TokenNameStringLiteral :
3596 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3597 case TokenNameStringConstant :
3598 return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3599 case TokenNameStringInterpolated :
3600 return "StringInterpolated(" + new String(getCurrentTokenSource())
3601 + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3602 case TokenNameHEREDOC :
3603 return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3604 case TokenNamePLUS_PLUS :
3605 return "++"; //$NON-NLS-1$
3606 case TokenNameMINUS_MINUS :
3607 return "--"; //$NON-NLS-1$
3608 case TokenNameEQUAL_EQUAL :
3609 return "=="; //$NON-NLS-1$
3610 case TokenNameEQUAL_EQUAL_EQUAL :
3611 return "==="; //$NON-NLS-1$
3612 case TokenNameEQUAL_GREATER :
3613 return "=>"; //$NON-NLS-1$
3614 case TokenNameLESS_EQUAL :
3615 return "<="; //$NON-NLS-1$
3616 case TokenNameGREATER_EQUAL :
3617 return ">="; //$NON-NLS-1$
3618 case TokenNameNOT_EQUAL :
3619 return "!="; //$NON-NLS-1$
3620 case TokenNameNOT_EQUAL_EQUAL :
3621 return "!=="; //$NON-NLS-1$
3622 case TokenNameLEFT_SHIFT :
3623 return "<<"; //$NON-NLS-1$
3624 case TokenNameRIGHT_SHIFT :
3625 return ">>"; //$NON-NLS-1$
3626 case TokenNamePLUS_EQUAL :
3627 return "+="; //$NON-NLS-1$
3628 case TokenNameMINUS_EQUAL :
3629 return "-="; //$NON-NLS-1$
3630 case TokenNameMULTIPLY_EQUAL :
3631 return "*="; //$NON-NLS-1$
3632 case TokenNameDIVIDE_EQUAL :
3633 return "/="; //$NON-NLS-1$
3634 case TokenNameAND_EQUAL :
3635 return "&="; //$NON-NLS-1$
3636 case TokenNameOR_EQUAL :
3637 return "|="; //$NON-NLS-1$
3638 case TokenNameXOR_EQUAL :
3639 return "^="; //$NON-NLS-1$
3640 case TokenNameREMAINDER_EQUAL :
3641 return "%="; //$NON-NLS-1$
3642 case TokenNameDOT_EQUAL :
3643 return ".="; //$NON-NLS-1$
3644 case TokenNameLEFT_SHIFT_EQUAL :
3645 return "<<="; //$NON-NLS-1$
3646 case TokenNameRIGHT_SHIFT_EQUAL :
3647 return ">>="; //$NON-NLS-1$
3648 case TokenNameOR_OR :
3649 return "||"; //$NON-NLS-1$
3650 case TokenNameAND_AND :
3651 return "&&"; //$NON-NLS-1$
3652 case TokenNamePLUS :
3653 return "+"; //$NON-NLS-1$
3654 case TokenNameMINUS :
3655 return "-"; //$NON-NLS-1$
3656 case TokenNameMINUS_GREATER :
3659 return "!"; //$NON-NLS-1$
3660 case TokenNameREMAINDER :
3661 return "%"; //$NON-NLS-1$
3663 return "^"; //$NON-NLS-1$
3665 return "&"; //$NON-NLS-1$
3666 case TokenNameMULTIPLY :
3667 return "*"; //$NON-NLS-1$
3669 return "|"; //$NON-NLS-1$
3670 case TokenNameTWIDDLE :
3671 return "~"; //$NON-NLS-1$
3672 case TokenNameTWIDDLE_EQUAL :
3673 return "~="; //$NON-NLS-1$
3674 case TokenNameDIVIDE :
3675 return "/"; //$NON-NLS-1$
3676 case TokenNameGREATER :
3677 return ">"; //$NON-NLS-1$
3678 case TokenNameLESS :
3679 return "<"; //$NON-NLS-1$
3680 case TokenNameLPAREN :
3681 return "("; //$NON-NLS-1$
3682 case TokenNameRPAREN :
3683 return ")"; //$NON-NLS-1$
3684 case TokenNameLBRACE :
3685 return "{"; //$NON-NLS-1$
3686 case TokenNameRBRACE :
3687 return "}"; //$NON-NLS-1$
3688 case TokenNameLBRACKET :
3689 return "["; //$NON-NLS-1$
3690 case TokenNameRBRACKET :
3691 return "]"; //$NON-NLS-1$
3692 case TokenNameSEMICOLON :
3693 return ";"; //$NON-NLS-1$
3694 case TokenNameQUESTION :
3695 return "?"; //$NON-NLS-1$
3696 case TokenNameCOLON :
3697 return ":"; //$NON-NLS-1$
3698 case TokenNameCOMMA :
3699 return ","; //$NON-NLS-1$
3701 return "."; //$NON-NLS-1$
3702 case TokenNameEQUAL :
3703 return "="; //$NON-NLS-1$
3706 case TokenNameDOLLAR :
3708 // case TokenNameDOLLAR_LBRACE :
3711 return "EOF"; //$NON-NLS-1$
3712 case TokenNameWHITESPACE :
3713 return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3714 case TokenNameCOMMENT_LINE :
3715 return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3716 case TokenNameCOMMENT_BLOCK :
3717 return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3718 case TokenNameCOMMENT_PHPDOC :
3719 return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3720 // case TokenNameHTML :
3721 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
3723 case TokenNameFILE :
3724 return "__FILE__"; //$NON-NLS-1$
3725 case TokenNameLINE :
3726 return "__LINE__"; //$NON-NLS-1$
3727 case TokenNameCLASS_C :
3728 return "__CLASS__"; //$NON-NLS-1$
3729 case TokenNameMETHOD_C :
3730 return "__METHOD__"; //$NON-NLS-1$
3731 case TokenNameFUNC_C :
3732 return "__FUNCTION__"; //$NON-NLS-1
3733 case TokenNameboolCAST :
3734 return "( bool )"; //$NON-NLS-1$
3735 case TokenNameintCAST :
3736 return "( int )"; //$NON-NLS-1$
3737 case TokenNamedoubleCAST :
3738 return "( double )"; //$NON-NLS-1$
3739 case TokenNameobjectCAST :
3740 return "( object )"; //$NON-NLS-1$
3741 case TokenNamestringCAST :
3742 return "( string )"; //$NON-NLS-1$
3744 return "not-a-token(" + (new Integer(act)) + ") "
3745 + new String(getCurrentTokenSource()); //$NON-NLS-1$
3748 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3749 boolean checkNonExternalizedStringLiterals) {
3750 this(tokenizeComments, tokenizeWhiteSpace,
3751 checkNonExternalizedStringLiterals, false);
3753 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3754 boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3755 this.eofPosition = Integer.MAX_VALUE;
3756 this.tokenizeComments = tokenizeComments;
3757 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3758 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3759 this.assertMode = assertMode;
3761 private void checkNonExternalizeString() throws InvalidInputException {
3762 if (currentLine == null)
3764 parseTags(currentLine);
3766 private void parseTags(NLSLine line) throws InvalidInputException {
3767 String s = new String(getCurrentTokenSource());
3768 int pos = s.indexOf(TAG_PREFIX);
3769 int lineLength = line.size();
3771 int start = pos + TAG_PREFIX_LENGTH;
3772 int end = s.indexOf(TAG_POSTFIX, start);
3773 String index = s.substring(start, end);
3776 i = Integer.parseInt(index) - 1;
3777 // Tags are one based not zero based.
3778 } catch (NumberFormatException e) {
3779 i = -1; // we don't want to consider this as a valid NLS tag
3781 if (line.exists(i)) {
3784 pos = s.indexOf(TAG_PREFIX, start);
3786 this.nonNLSStrings = new StringLiteral[lineLength];
3787 int nonNLSCounter = 0;
3788 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3789 StringLiteral literal = (StringLiteral) iterator.next();
3790 if (literal != null) {
3791 this.nonNLSStrings[nonNLSCounter++] = literal;
3794 if (nonNLSCounter == 0) {
3795 this.nonNLSStrings = null;
3799 this.wasNonExternalizedStringLiteral = true;
3800 if (nonNLSCounter != lineLength) {
3801 System.arraycopy(this.nonNLSStrings, 0,
3802 (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3807 public final void scanEscapeCharacter() throws InvalidInputException {
3808 // the string with "\\u" is a legal string of two chars \ and u
3809 //thus we use a direct access to the source (for regular cases).
3810 if (unicodeAsBackSlash) {
3811 // consume next character
3812 unicodeAsBackSlash = false;
3813 // if (((currentCharacter = source[currentPosition++]) == '\\') &&
3814 // (source[currentPosition] == 'u')) {
3815 // getNextUnicodeChar();
3817 if (withoutUnicodePtr != 0) {
3818 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3822 currentCharacter = source[currentPosition++];
3823 switch (currentCharacter) {
3825 currentCharacter = '\b';
3828 currentCharacter = '\t';
3831 currentCharacter = '\n';
3834 currentCharacter = '\f';
3837 currentCharacter = '\r';
3840 currentCharacter = '\"';
3843 currentCharacter = '\'';
3846 currentCharacter = '\\';
3849 // -----------octal escape--------------
3851 // OctalDigit OctalDigit
3852 // ZeroToThree OctalDigit OctalDigit
3853 int number = Character.getNumericValue(currentCharacter);
3854 if (number >= 0 && number <= 7) {
3855 boolean zeroToThreeNot = number > 3;
3856 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3857 int digit = Character.getNumericValue(currentCharacter);
3858 if (digit >= 0 && digit <= 7) {
3859 number = (number * 8) + digit;
3861 .isDigit(currentCharacter = source[currentPosition++])) {
3862 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3863 // Digit --> ignore last character
3866 digit = Character.getNumericValue(currentCharacter);
3867 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3868 // OctalDigit OctalDigit
3869 number = (number * 8) + digit;
3870 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3871 // --> ignore last character
3875 } else { // has read \OctalDigit NonDigit--> ignore last
3879 } else { // has read \OctalDigit NonOctalDigit--> ignore last
3883 } else { // has read \OctalDigit --> ignore last character
3887 throw new InvalidInputException(INVALID_ESCAPE);
3888 currentCharacter = (char) number;
3890 throw new InvalidInputException(INVALID_ESCAPE);
3893 // chech presence of task: tags
3894 public void checkTaskTag(int commentStart, int commentEnd) {
3895 // only look for newer task: tags
3896 if (this.foundTaskCount > 0
3897 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3900 int foundTaskIndex = this.foundTaskCount;
3901 nextChar : for (int i = commentStart; i < commentEnd
3902 && i < this.eofPosition; i++) {
3904 char[] priority = null;
3905 // check for tag occurrence
3906 nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3907 tag = this.taskTags[itag];
3908 priority = this.taskPriorities != null
3909 && itag < this.taskPriorities.length
3910 ? this.taskPriorities[itag]
3912 int tagLength = tag.length;
3913 for (int t = 0; t < tagLength; t++) {
3914 if (this.source[i + t] != tag[t])
3917 if (this.foundTaskTags == null) {
3918 this.foundTaskTags = new char[5][];
3919 this.foundTaskMessages = new char[5][];
3920 this.foundTaskPriorities = new char[5][];
3921 this.foundTaskPositions = new int[5][];
3922 } else if (this.foundTaskCount == this.foundTaskTags.length) {
3923 System.arraycopy(this.foundTaskTags, 0,
3924 this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3925 this.foundTaskCount);
3926 System.arraycopy(this.foundTaskMessages, 0,
3927 this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3928 this.foundTaskCount);
3929 System.arraycopy(this.foundTaskPriorities, 0,
3930 this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3931 0, this.foundTaskCount);
3932 System.arraycopy(this.foundTaskPositions, 0,
3933 this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3934 this.foundTaskCount);
3936 this.foundTaskTags[this.foundTaskCount] = tag;
3937 this.foundTaskPriorities[this.foundTaskCount] = priority;
3938 this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3940 this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3941 this.foundTaskCount++;
3942 i += tagLength - 1; // will be incremented when looping
3945 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3946 // retrieve message start and end positions
3947 int msgStart = this.foundTaskPositions[i][0]
3948 + this.foundTaskTags[i].length;
3949 int max_value = i + 1 < this.foundTaskCount
3950 ? this.foundTaskPositions[i + 1][0] - 1
3952 // at most beginning of next task
3953 if (max_value < msgStart)
3954 max_value = msgStart; // would only occur if tag is before EOF.
3957 for (int j = msgStart; j < max_value; j++) {
3958 if ((c = this.source[j]) == '\n' || c == '\r') {
3964 for (int j = max_value; j > msgStart; j--) {
3965 if ((c = this.source[j]) == '*') {
3973 if (msgStart == end)
3976 while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3978 while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3980 // update the end position of the task
3981 this.foundTaskPositions[i][1] = end;
3982 // get the message source
3983 final int messageLength = end - msgStart + 1;
3984 char[] message = new char[messageLength];
3985 System.arraycopy(source, msgStart, message, 0, messageLength);
3986 this.foundTaskMessages[i] = message;