1 /*******************************************************************************
2 * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Common Public License v0.5
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/cpl-v05.html
9 * IBM Corporation - initial API and implementation
10 ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
17 import net.sourceforge.phpdt.core.compiler.IScanner;
18 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
19 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
20 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
25 - getNextToken() which return the current type of the token
26 (this value is not memorized by the scanner)
27 - getCurrentTokenSource() which provides with the token "REAL" source
28 (aka all unicode have been transformed into a correct char)
29 - sourceStart gives the position into the stream
30 - currentPosition-1 gives the sourceEnd position into the stream
34 private boolean assertMode;
35 public boolean useAssertAsAnIndentifier = false;
36 //flag indicating if processed source contains occurrences of keyword assert
37 public boolean containsAssertKeyword = false;
39 public boolean recordLineSeparator;
40 public char currentCharacter;
41 public int startPosition;
42 public int currentPosition;
43 public int initialPosition, eofPosition;
44 // after this position eof are generated instead of real token from the source
46 public boolean tokenizeComments;
47 public boolean tokenizeWhiteSpace;
49 //source should be viewed as a window (aka a part)
50 //of a entire very large stream
54 public char[] withoutUnicodeBuffer;
55 public int withoutUnicodePtr; //when == 0 ==> no unicode in the current token
56 public boolean unicodeAsBackSlash = false;
58 public boolean scanningFloatLiteral = false;
60 //support for /** comments
61 //public char[][] comments = new char[10][];
62 public int[] commentStops = new int[10];
63 public int[] commentStarts = new int[10];
64 public int commentPtr = -1; // no comment test with commentPtr value -1
66 //diet parsing support - jump over some method body when requested
67 public boolean diet = false;
69 //support for the poor-line-debuggers ....
70 //remember the position of the cr/lf
71 public int[] lineEnds = new int[250];
72 public int linePtr = -1;
73 public boolean wasAcr = false;
75 public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
77 public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
78 public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
79 public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
80 public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
81 public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
82 public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
83 public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
85 public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
86 public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
87 public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
88 public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
90 //----------------optimized identifier managment------------------
91 static final char[] charArray_a = new char[] { 'a' },
92 charArray_b = new char[] { 'b' },
93 charArray_c = new char[] { 'c' },
94 charArray_d = new char[] { 'd' },
95 charArray_e = new char[] { 'e' },
96 charArray_f = new char[] { 'f' },
97 charArray_g = new char[] { 'g' },
98 charArray_h = new char[] { 'h' },
99 charArray_i = new char[] { 'i' },
100 charArray_j = new char[] { 'j' },
101 charArray_k = new char[] { 'k' },
102 charArray_l = new char[] { 'l' },
103 charArray_m = new char[] { 'm' },
104 charArray_n = new char[] { 'n' },
105 charArray_o = new char[] { 'o' },
106 charArray_p = new char[] { 'p' },
107 charArray_q = new char[] { 'q' },
108 charArray_r = new char[] { 'r' },
109 charArray_s = new char[] { 's' },
110 charArray_t = new char[] { 't' },
111 charArray_u = new char[] { 'u' },
112 charArray_v = new char[] { 'v' },
113 charArray_w = new char[] { 'w' },
114 charArray_x = new char[] { 'x' },
115 charArray_y = new char[] { 'y' },
116 charArray_z = new char[] { 'z' };
118 static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
119 static final int TableSize = 30, InternalTableSize = 6; //30*6 = 180 entries
120 public static final int OptimizedLength = 6;
122 final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
123 // support for detecting non-externalized string literals
124 int currentLineNr = -1;
125 int previousLineNr = -1;
126 NLSLine currentLine = null;
127 List lines = new ArrayList();
128 public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
129 public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
130 public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
131 public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
132 public StringLiteral[] nonNLSStrings = null;
133 public boolean checkNonExternalizedStringLiterals = true;
134 public boolean wasNonExternalizedStringLiteral = false;
137 for (int i = 0; i < 6; i++) {
138 for (int j = 0; j < TableSize; j++) {
139 for (int k = 0; k < InternalTableSize; k++) {
140 charArray_length[i][j][k] = initCharArray;
145 static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
147 public static final int RoundBracket = 0;
148 public static final int SquareBracket = 1;
149 public static final int CurlyBracket = 2;
150 public static final int BracketKinds = 3;
154 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
155 this(tokenizeComments, tokenizeWhiteSpace, false);
157 public final boolean atEnd() {
158 // This code is not relevant if source is
159 // Only a part of the real stream input
161 return source.length == currentPosition;
163 public char[] getCurrentIdentifierSource() {
164 //return the token REAL source (aka unicodes are precomputed)
167 if (withoutUnicodePtr != 0)
168 //0 is used as a fast test flag so the real first char is in position 1
169 System.arraycopy(withoutUnicodeBuffer, 1, result = new char[withoutUnicodePtr], 0, withoutUnicodePtr);
171 int length = currentPosition - startPosition;
172 switch (length) { // see OptimizedLength
174 return optimizedCurrentTokenSource1();
176 return optimizedCurrentTokenSource2();
178 return optimizedCurrentTokenSource3();
180 return optimizedCurrentTokenSource4();
182 return optimizedCurrentTokenSource5();
184 return optimizedCurrentTokenSource6();
187 System.arraycopy(source, startPosition, result = new char[length], 0, length);
191 public int getCurrentTokenEndPosition() {
192 return this.currentPosition - 1;
194 public final char[] getCurrentTokenSource() {
195 // Return the token REAL source (aka unicodes are precomputed)
198 if (withoutUnicodePtr != 0)
199 // 0 is used as a fast test flag so the real first char is in position 1
200 System.arraycopy(withoutUnicodeBuffer, 1, result = new char[withoutUnicodePtr], 0, withoutUnicodePtr);
203 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
207 public final char[] getCurrentTokenSourceString() {
208 //return the token REAL source (aka unicodes are precomputed).
209 //REMOVE the two " that are at the beginning and the end.
212 if (withoutUnicodePtr != 0)
213 //0 is used as a fast test flag so the real first char is in position 1
214 System.arraycopy(withoutUnicodeBuffer, 2,
215 //2 is 1 (real start) + 1 (to jump over the ")
216 result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
219 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
223 public int getCurrentTokenStartPosition() {
224 return this.startPosition;
227 * Search the source position corresponding to the end of a given line number
229 * Line numbers are 1-based, and relative to the scanner initialPosition.
230 * Character positions are 0-based.
232 * In case the given line number is inconsistent, answers -1.
234 public final int getLineEnd(int lineNumber) {
236 if (lineEnds == null)
238 if (lineNumber >= lineEnds.length)
243 if (lineNumber == lineEnds.length - 1)
245 return lineEnds[lineNumber - 1]; // next line start one character behind the lineEnd of the previous line
248 * Search the source position corresponding to the beginning of a given line number
250 * Line numbers are 1-based, and relative to the scanner initialPosition.
251 * Character positions are 0-based.
253 * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
255 * In case the given line number is inconsistent, answers -1.
257 public final int getLineStart(int lineNumber) {
259 if (lineEnds == null)
261 if (lineNumber >= lineEnds.length)
267 return initialPosition;
268 return lineEnds[lineNumber - 2] + 1; // next line start one character behind the lineEnd of the previous line
270 public final boolean getNextChar(char testedChar) {
272 //handle the case of unicode.
273 //when a unicode appears then we must use a buffer that holds char internal values
274 //At the end of this method currentCharacter holds the new visited char
275 //and currentPosition points right next after it
276 //Both previous lines are true if the currentCharacter is == to the testedChar
277 //On false, no side effect has occured.
279 //ALL getNextChar.... ARE OPTIMIZED COPIES
281 int temp = currentPosition;
283 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
284 //-------------unicode traitement ------------
288 while (source[currentPosition] == 'u') {
293 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
294 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
295 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
296 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
297 currentPosition = temp;
301 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
302 if (currentCharacter != testedChar) {
303 currentPosition = temp;
306 unicodeAsBackSlash = currentCharacter == '\\';
308 //need the unicode buffer
309 if (withoutUnicodePtr == 0) {
310 //buffer all the entries that have been left aside....
311 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
312 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
314 //fill the buffer with the char
315 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
318 } //-------------end unicode traitement--------------
320 if (currentCharacter != testedChar) {
321 currentPosition = temp;
324 unicodeAsBackSlash = false;
325 if (withoutUnicodePtr != 0)
326 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
329 } catch (IndexOutOfBoundsException e) {
330 unicodeAsBackSlash = false;
331 currentPosition = temp;
335 public final int getNextChar(char testedChar1, char testedChar2) {
336 //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
337 //test can be done with (x==0) for the first and (x>0) for the second
338 //handle the case of unicode.
339 //when a unicode appears then we must use a buffer that holds char internal values
340 //At the end of this method currentCharacter holds the new visited char
341 //and currentPosition points right next after it
342 //Both previous lines are true if the currentCharacter is == to the testedChar1/2
343 //On false, no side effect has occured.
345 //ALL getNextChar.... ARE OPTIMIZED COPIES
347 int temp = currentPosition;
350 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
351 //-------------unicode traitement ------------
355 while (source[currentPosition] == 'u') {
360 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
361 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
362 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
363 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
364 currentPosition = temp;
368 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
369 if (currentCharacter == testedChar1)
371 else if (currentCharacter == testedChar2)
374 currentPosition = temp;
378 //need the unicode buffer
379 if (withoutUnicodePtr == 0) {
380 //buffer all the entries that have been left aside....
381 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
382 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
384 //fill the buffer with the char
385 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
387 } //-------------end unicode traitement--------------
389 if (currentCharacter == testedChar1)
391 else if (currentCharacter == testedChar2)
394 currentPosition = temp;
398 if (withoutUnicodePtr != 0)
399 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
402 } catch (IndexOutOfBoundsException e) {
403 currentPosition = temp;
407 public final boolean getNextCharAsDigit() {
409 //handle the case of unicode.
410 //when a unicode appears then we must use a buffer that holds char internal values
411 //At the end of this method currentCharacter holds the new visited char
412 //and currentPosition points right next after it
413 //Both previous lines are true if the currentCharacter is a digit
414 //On false, no side effect has occured.
416 //ALL getNextChar.... ARE OPTIMIZED COPIES
418 int temp = currentPosition;
420 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
421 //-------------unicode traitement ------------
425 while (source[currentPosition] == 'u') {
430 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
431 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
432 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
433 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
434 currentPosition = temp;
438 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
439 if (!Character.isDigit(currentCharacter)) {
440 currentPosition = temp;
444 //need the unicode buffer
445 if (withoutUnicodePtr == 0) {
446 //buffer all the entries that have been left aside....
447 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
448 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
450 //fill the buffer with the char
451 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
453 } //-------------end unicode traitement--------------
455 if (!Character.isDigit(currentCharacter)) {
456 currentPosition = temp;
459 if (withoutUnicodePtr != 0)
460 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
463 } catch (IndexOutOfBoundsException e) {
464 currentPosition = temp;
468 public final boolean getNextCharAsDigit(int radix) {
470 //handle the case of unicode.
471 //when a unicode appears then we must use a buffer that holds char internal values
472 //At the end of this method currentCharacter holds the new visited char
473 //and currentPosition points right next after it
474 //Both previous lines are true if the currentCharacter is a digit base on radix
475 //On false, no side effect has occured.
477 //ALL getNextChar.... ARE OPTIMIZED COPIES
479 int temp = currentPosition;
481 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
482 //-------------unicode traitement ------------
486 while (source[currentPosition] == 'u') {
491 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
492 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
493 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
494 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
495 currentPosition = temp;
499 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
500 if (Character.digit(currentCharacter, radix) == -1) {
501 currentPosition = temp;
505 //need the unicode buffer
506 if (withoutUnicodePtr == 0) {
507 //buffer all the entries that have been left aside....
508 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
509 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
511 //fill the buffer with the char
512 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
514 } //-------------end unicode traitement--------------
516 if (Character.digit(currentCharacter, radix) == -1) {
517 currentPosition = temp;
520 if (withoutUnicodePtr != 0)
521 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
524 } catch (IndexOutOfBoundsException e) {
525 currentPosition = temp;
529 public boolean getNextCharAsJavaIdentifierPart() {
531 //handle the case of unicode.
532 //when a unicode appears then we must use a buffer that holds char internal values
533 //At the end of this method currentCharacter holds the new visited char
534 //and currentPosition points right next after it
535 //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
536 //On false, no side effect has occured.
538 //ALL getNextChar.... ARE OPTIMIZED COPIES
540 int temp = currentPosition;
542 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
543 //-------------unicode traitement ------------
547 while (source[currentPosition] == 'u') {
552 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
553 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
554 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
555 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
556 currentPosition = temp;
560 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
561 if (!Character.isJavaIdentifierPart(currentCharacter)) {
562 currentPosition = temp;
566 //need the unicode buffer
567 if (withoutUnicodePtr == 0) {
568 //buffer all the entries that have been left aside....
569 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
570 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
572 //fill the buffer with the char
573 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
575 } //-------------end unicode traitement--------------
577 if (!Character.isJavaIdentifierPart(currentCharacter)) {
578 currentPosition = temp;
582 if (withoutUnicodePtr != 0)
583 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
586 } catch (IndexOutOfBoundsException e) {
587 currentPosition = temp;
591 public int getNextToken() throws InvalidInputException {
595 jumpOverMethodBody();
597 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
600 while (true) { //loop for jumping over comments
601 withoutUnicodePtr = 0;
602 //start with a new token (even comment written with unicode )
604 // ---------Consume white space and handles startPosition---------
605 int whiteStart = currentPosition;
606 boolean isWhiteSpace;
608 startPosition = currentPosition;
609 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
610 isWhiteSpace = jumpOverUnicodeWhiteSpace();
612 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
613 checkNonExternalizeString();
614 if (recordLineSeparator) {
620 isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
622 } while (isWhiteSpace);
623 if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
624 // reposition scanner in case we are interested by spaces as tokens
626 startPosition = whiteStart;
627 return TokenNameWHITESPACE;
629 //little trick to get out in the middle of a source compuation
630 if (currentPosition > eofPosition)
633 // ---------Identify the next token-------------
635 switch (currentCharacter) {
637 return TokenNameLPAREN;
639 return TokenNameRPAREN;
641 return TokenNameLBRACE;
643 return TokenNameRBRACE;
645 return TokenNameLBRACKET;
647 return TokenNameRBRACKET;
649 return TokenNameSEMICOLON;
651 return TokenNameCOMMA;
653 if (getNextCharAsDigit())
654 return scanNumber(true);
659 if ((test = getNextChar('+', '=')) == 0)
660 return TokenNamePLUS_PLUS;
662 return TokenNamePLUS_EQUAL;
663 return TokenNamePLUS;
668 if ((test = getNextChar('-', '=')) == 0)
669 return TokenNameMINUS_MINUS;
671 return TokenNameMINUS_EQUAL;
672 return TokenNameMINUS;
675 return TokenNameTWIDDLE;
677 if (getNextChar('='))
678 return TokenNameNOT_EQUAL;
681 if (getNextChar('='))
682 return TokenNameMULTIPLY_EQUAL;
683 return TokenNameMULTIPLY;
685 if (getNextChar('='))
686 return TokenNameREMAINDER_EQUAL;
687 return TokenNameREMAINDER;
691 if ((test = getNextChar('=', '<')) == 0)
692 return TokenNameLESS_EQUAL;
694 if (getNextChar('='))
695 return TokenNameLEFT_SHIFT_EQUAL;
696 return TokenNameLEFT_SHIFT;
698 return TokenNameLESS;
703 if ((test = getNextChar('=', '>')) == 0)
704 return TokenNameGREATER_EQUAL;
706 if ((test = getNextChar('=', '>')) == 0)
707 return TokenNameRIGHT_SHIFT_EQUAL;
709 if (getNextChar('='))
710 return TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL;
711 return TokenNameUNSIGNED_RIGHT_SHIFT;
713 return TokenNameRIGHT_SHIFT;
715 return TokenNameGREATER;
718 if (getNextChar('='))
719 return TokenNameEQUAL_EQUAL;
720 return TokenNameEQUAL;
724 if ((test = getNextChar('&', '=')) == 0)
725 return TokenNameAND_AND;
727 return TokenNameAND_EQUAL;
733 if ((test = getNextChar('|', '=')) == 0)
734 return TokenNameOR_OR;
736 return TokenNameOR_EQUAL;
740 if (getNextChar('='))
741 return TokenNameXOR_EQUAL;
744 return TokenNameQUESTION;
746 return TokenNameCOLON;
750 if ((test = getNextChar('\n', '\r')) == 0) {
751 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
754 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
755 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
756 if (currentPosition + lookAhead == source.length)
758 if (source[currentPosition + lookAhead] == '\n')
760 if (source[currentPosition + lookAhead] == '\'') {
761 currentPosition += lookAhead + 1;
765 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
768 if (getNextChar('\'')) {
769 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
770 for (int lookAhead = 0; lookAhead < 3; lookAhead++) {
771 if (currentPosition + lookAhead == source.length)
773 if (source[currentPosition + lookAhead] == '\n')
775 if (source[currentPosition + lookAhead] == '\'') {
776 currentPosition += lookAhead + 1;
780 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
782 if (getNextChar('\\'))
783 scanEscapeCharacter();
784 else { // consume next character
785 unicodeAsBackSlash = false;
786 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
787 getNextUnicodeChar();
789 if (withoutUnicodePtr != 0) {
790 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
794 if (getNextChar('\''))
795 return TokenNameCharacterLiteral;
796 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
797 for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
798 if (currentPosition + lookAhead == source.length)
800 if (source[currentPosition + lookAhead] == '\n')
802 if (source[currentPosition + lookAhead] == '\'') {
803 currentPosition += lookAhead + 1;
807 throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
810 // consume next character
811 unicodeAsBackSlash = false;
812 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
813 getNextUnicodeChar();
815 if (withoutUnicodePtr != 0) {
816 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
820 while (currentCharacter != '"') {
821 /**** \r and \n are not valid in string literals ****/
822 if ((currentCharacter == '\n') || (currentCharacter == '\r')) {
823 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
824 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
825 if (currentPosition + lookAhead == source.length)
827 if (source[currentPosition + lookAhead] == '\n')
829 if (source[currentPosition + lookAhead] == '\"') {
830 currentPosition += lookAhead + 1;
834 throw new InvalidInputException(INVALID_CHAR_IN_STRING);
836 if (currentCharacter == '\\') {
837 int escapeSize = currentPosition;
838 boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
839 //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
840 scanEscapeCharacter();
841 escapeSize = currentPosition - escapeSize;
842 if (withoutUnicodePtr == 0) {
843 //buffer all the entries that have been left aside....
844 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
845 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
846 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
847 } else { //overwrite the / in the buffer
848 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
849 if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
854 // consume next character
855 unicodeAsBackSlash = false;
856 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
857 getNextUnicodeChar();
859 if (withoutUnicodePtr != 0) {
860 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
865 } catch (IndexOutOfBoundsException e) {
866 throw new InvalidInputException(UNTERMINATED_STRING);
867 } catch (InvalidInputException e) {
868 if (e.getMessage().equals(INVALID_ESCAPE)) {
869 // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
870 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
871 if (currentPosition + lookAhead == source.length)
873 if (source[currentPosition + lookAhead] == '\n')
875 if (source[currentPosition + lookAhead] == '\"') {
876 currentPosition += lookAhead + 1;
884 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
885 if (currentLine == null) {
886 currentLine = new NLSLine();
887 lines.add(currentLine);
889 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
891 return TokenNameStringLiteral;
895 if ((test = getNextChar('/', '*')) == 0) { //line comment
896 int endPositionForLineComment = 0;
897 try { //get the next char
898 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
899 //-------------unicode traitement ------------
900 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
902 while (source[currentPosition] == 'u') {
905 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
907 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
909 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
911 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
913 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
915 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
919 //handle the \\u case manually into comment
920 if (currentCharacter == '\\') {
921 if (source[currentPosition] == '\\')
924 boolean isUnicode = false;
925 while (currentCharacter != '\r' && currentCharacter != '\n') {
928 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
930 //-------------unicode traitement ------------
931 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
933 while (source[currentPosition] == 'u') {
936 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
938 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
940 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
942 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
944 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
946 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
949 //handle the \\u case manually into comment
950 if (currentCharacter == '\\') {
951 if (source[currentPosition] == '\\')
956 endPositionForLineComment = currentPosition - 6;
958 endPositionForLineComment = currentPosition - 1;
960 recordComment(false);
961 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
962 checkNonExternalizeString();
963 if (recordLineSeparator) {
965 pushUnicodeLineSeparator();
973 if (tokenizeComments) {
975 currentPosition = endPositionForLineComment; // reset one character behind
977 return TokenNameCOMMENT_LINE;
979 } catch (IndexOutOfBoundsException e) { //an eof will them be generated
980 if (tokenizeComments) {
981 currentPosition--; // reset one character behind
982 return TokenNameCOMMENT_LINE;
987 if (test > 0) { //traditional and annotation comment
988 boolean isJavadoc = false, star = false;
989 // consume next character
990 unicodeAsBackSlash = false;
991 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
992 getNextUnicodeChar();
994 if (withoutUnicodePtr != 0) {
995 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
999 if (currentCharacter == '*') {
1003 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1004 checkNonExternalizeString();
1005 if (recordLineSeparator) {
1006 pushLineSeparator();
1011 try { //get the next char
1012 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1013 //-------------unicode traitement ------------
1014 getNextUnicodeChar();
1016 //handle the \\u case manually into comment
1017 if (currentCharacter == '\\') {
1018 if (source[currentPosition] == '\\')
1019 currentPosition++; //jump over the \\
1021 // empty comment is not a javadoc /**/
1022 if (currentCharacter == '/') {
1025 //loop until end of comment */
1026 while ((currentCharacter != '/') || (!star)) {
1027 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1028 checkNonExternalizeString();
1029 if (recordLineSeparator) {
1030 pushLineSeparator();
1035 star = currentCharacter == '*';
1037 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1038 //-------------unicode traitement ------------
1039 getNextUnicodeChar();
1041 //handle the \\u case manually into comment
1042 if (currentCharacter == '\\') {
1043 if (source[currentPosition] == '\\')
1045 } //jump over the \\
1047 recordComment(isJavadoc);
1048 if (tokenizeComments) {
1050 return TokenNameCOMMENT_JAVADOC;
1051 return TokenNameCOMMENT_BLOCK;
1053 } catch (IndexOutOfBoundsException e) {
1054 throw new InvalidInputException(UNTERMINATED_COMMENT);
1058 if (getNextChar('='))
1059 return TokenNameDIVIDE_EQUAL;
1060 return TokenNameDIVIDE;
1064 return TokenNameEOF;
1065 //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1066 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1069 if (Character.isJavaIdentifierStart(currentCharacter))
1070 return scanIdentifierOrKeyword();
1071 if (Character.isDigit(currentCharacter))
1072 return scanNumber(false);
1073 return TokenNameERROR;
1076 } //-----------------end switch while try--------------------
1077 catch (IndexOutOfBoundsException e) {
1079 return TokenNameEOF;
1081 public final void getNextUnicodeChar() throws IndexOutOfBoundsException, InvalidInputException {
1083 //handle the case of unicode.
1084 //when a unicode appears then we must use a buffer that holds char internal values
1085 //At the end of this method currentCharacter holds the new visited char
1086 //and currentPosition points right next after it
1088 //ALL getNextChar.... ARE OPTIMIZED COPIES
1090 int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1092 while (source[currentPosition] == 'u') {
1097 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1099 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1101 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1103 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1105 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1107 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1108 //need the unicode buffer
1109 if (withoutUnicodePtr == 0) {
1110 //buffer all the entries that have been left aside....
1111 withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1112 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1114 //fill the buffer with the char
1115 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1117 unicodeAsBackSlash = currentCharacter == '\\';
1119 /* Tokenize a method body, assuming that curly brackets are properly balanced.
1121 public final void jumpOverMethodBody() {
1123 this.wasAcr = false;
1126 while (true) { //loop for jumping over comments
1127 // ---------Consume white space and handles startPosition---------
1128 boolean isWhiteSpace;
1130 startPosition = currentPosition;
1131 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1132 isWhiteSpace = jumpOverUnicodeWhiteSpace();
1134 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1135 pushLineSeparator();
1136 isWhiteSpace = Character.isWhitespace(currentCharacter);
1138 } while (isWhiteSpace);
1140 // -------consume token until } is found---------
1141 switch (currentCharacter) {
1153 test = getNextChar('\\');
1156 scanEscapeCharacter();
1157 } catch (InvalidInputException ex) {
1160 try { // consume next character
1161 unicodeAsBackSlash = false;
1162 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1163 getNextUnicodeChar();
1165 if (withoutUnicodePtr != 0) {
1166 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1169 } catch (InvalidInputException ex) {
1177 try { // consume next character
1178 unicodeAsBackSlash = false;
1179 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1180 getNextUnicodeChar();
1182 if (withoutUnicodePtr != 0) {
1183 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1186 } catch (InvalidInputException ex) {
1188 while (currentCharacter != '"') {
1189 if (currentCharacter == '\r') {
1190 if (source[currentPosition] == '\n')
1192 break; // the string cannot go further that the line
1194 if (currentCharacter == '\n') {
1195 break; // the string cannot go further that the line
1197 if (currentCharacter == '\\') {
1199 scanEscapeCharacter();
1200 } catch (InvalidInputException ex) {
1203 try { // consume next character
1204 unicodeAsBackSlash = false;
1205 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1206 getNextUnicodeChar();
1208 if (withoutUnicodePtr != 0) {
1209 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1212 } catch (InvalidInputException ex) {
1215 } catch (IndexOutOfBoundsException e) {
1222 if ((test = getNextChar('/', '*')) == 0) { //line comment
1225 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1226 //-------------unicode traitement ------------
1227 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1229 while (source[currentPosition] == 'u') {
1232 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1234 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1236 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1238 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1239 || c4 < 0) { //error don't care of the value
1240 currentCharacter = 'A';
1241 } //something different from \n and \r
1243 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1247 while (currentCharacter != '\r' && currentCharacter != '\n') {
1249 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1250 //-------------unicode traitement ------------
1251 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1253 while (source[currentPosition] == 'u') {
1256 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1258 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1260 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1262 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1263 || c4 < 0) { //error don't care of the value
1264 currentCharacter = 'A';
1265 } //something different from \n and \r
1267 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1271 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1272 pushLineSeparator();
1273 } catch (IndexOutOfBoundsException e) {
1274 } //an eof will them be generated
1277 if (test > 0) { //traditional and annotation comment
1278 boolean star = false;
1279 try { // consume next character
1280 unicodeAsBackSlash = false;
1281 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1282 getNextUnicodeChar();
1284 if (withoutUnicodePtr != 0) {
1285 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1288 } catch (InvalidInputException ex) {
1290 if (currentCharacter == '*') {
1293 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1294 pushLineSeparator();
1295 try { //get the next char
1296 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1297 //-------------unicode traitement ------------
1298 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1300 while (source[currentPosition] == 'u') {
1303 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1305 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1307 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1309 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1310 || c4 < 0) { //error don't care of the value
1311 currentCharacter = 'A';
1312 } //something different from * and /
1314 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1317 //loop until end of comment */
1318 while ((currentCharacter != '/') || (!star)) {
1319 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1320 pushLineSeparator();
1321 star = currentCharacter == '*';
1323 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1324 //-------------unicode traitement ------------
1325 int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1327 while (source[currentPosition] == 'u') {
1330 if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1332 || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1334 || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1336 || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1337 || c4 < 0) { //error don't care of the value
1338 currentCharacter = 'A';
1339 } //something different from * and /
1341 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1345 } catch (IndexOutOfBoundsException e) {
1354 if (Character.isJavaIdentifierStart(currentCharacter)) {
1356 scanIdentifierOrKeyword();
1357 } catch (InvalidInputException ex) {
1361 if (Character.isDigit(currentCharacter)) {
1364 } catch (InvalidInputException ex) {
1370 //-----------------end switch while try--------------------
1371 } catch (IndexOutOfBoundsException e) {
1372 } catch (InvalidInputException e) {
1376 public final boolean jumpOverUnicodeWhiteSpace() throws InvalidInputException {
1378 //handle the case of unicode. Jump over the next whiteSpace
1379 //making startPosition pointing on the next available char
1380 //On false, the currentCharacter is filled up with a potential
1384 this.wasAcr = false;
1386 int unicodeSize = 6;
1388 while (source[currentPosition] == 'u') {
1393 if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15 || c1 < 0)
1394 || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15 || c2 < 0)
1395 || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15 || c3 < 0)
1396 || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15 || c4 < 0)) {
1397 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1400 currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1401 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1402 pushLineSeparator();
1403 if (Character.isWhitespace(currentCharacter))
1406 //buffer the new char which is not a white space
1407 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1408 //withoutUnicodePtr == 1 is true here
1410 } catch (IndexOutOfBoundsException e) {
1411 throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1414 public final int[] getLineEnds() {
1415 //return a bounded copy of this.lineEnds
1418 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
1422 public char[] getSource() {
1425 final char[] optimizedCurrentTokenSource1() {
1426 //return always the same char[] build only once
1428 //optimization at no speed cost of 99.5 % of the singleCharIdentifier
1429 char charOne = source[startPosition];
1484 return new char[] { charOne };
1487 final char[] optimizedCurrentTokenSource2() {
1488 //try to return the same char[] build only once
1491 int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
1492 char[][] table = charArray_length[0][hash];
1494 while (++i < InternalTableSize) {
1495 char[] charArray = table[i];
1496 if ((c0 == charArray[0]) && (c1 == charArray[1]))
1499 //---------other side---------
1501 int max = newEntry2;
1502 while (++i <= max) {
1503 char[] charArray = table[i];
1504 if ((c0 == charArray[0]) && (c1 == charArray[1]))
1507 //--------add the entry-------
1508 if (++max >= InternalTableSize)
1511 table[max] = (r = new char[] { c0, c1 });
1515 final char[] optimizedCurrentTokenSource3() {
1516 //try to return the same char[] build only once
1520 (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
1522 char[][] table = charArray_length[1][hash];
1524 while (++i < InternalTableSize) {
1525 char[] charArray = table[i];
1526 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
1529 //---------other side---------
1531 int max = newEntry3;
1532 while (++i <= max) {
1533 char[] charArray = table[i];
1534 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
1537 //--------add the entry-------
1538 if (++max >= InternalTableSize)
1541 table[max] = (r = new char[] { c0, c1, c2 });
1545 final char[] optimizedCurrentTokenSource4() {
1546 //try to return the same char[] build only once
1548 char c0, c1, c2, c3;
1550 ((((long) (c0 = source[startPosition])) << 18)
1551 + ((c1 = source[startPosition + 1]) << 12)
1552 + ((c2 = source[startPosition + 2]) << 6)
1553 + (c3 = source[startPosition + 3]))
1555 char[][] table = charArray_length[2][(int) hash];
1557 while (++i < InternalTableSize) {
1558 char[] charArray = table[i];
1559 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
1562 //---------other side---------
1564 int max = newEntry4;
1565 while (++i <= max) {
1566 char[] charArray = table[i];
1567 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
1570 //--------add the entry-------
1571 if (++max >= InternalTableSize)
1574 table[max] = (r = new char[] { c0, c1, c2, c3 });
1579 final char[] optimizedCurrentTokenSource5() {
1580 //try to return the same char[] build only once
1582 char c0, c1, c2, c3, c4;
1584 ((((long) (c0 = source[startPosition])) << 24)
1585 + (((long) (c1 = source[startPosition + 1])) << 18)
1586 + ((c2 = source[startPosition + 2]) << 12)
1587 + ((c3 = source[startPosition + 3]) << 6)
1588 + (c4 = source[startPosition + 4]))
1590 char[][] table = charArray_length[3][(int) hash];
1592 while (++i < InternalTableSize) {
1593 char[] charArray = table[i];
1594 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
1597 //---------other side---------
1599 int max = newEntry5;
1600 while (++i <= max) {
1601 char[] charArray = table[i];
1602 if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
1605 //--------add the entry-------
1606 if (++max >= InternalTableSize)
1609 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
1614 final char[] optimizedCurrentTokenSource6() {
1615 //try to return the same char[] build only once
1617 char c0, c1, c2, c3, c4, c5;
1619 ((((long) (c0 = source[startPosition])) << 32)
1620 + (((long) (c1 = source[startPosition + 1])) << 24)
1621 + (((long) (c2 = source[startPosition + 2])) << 18)
1622 + ((c3 = source[startPosition + 3]) << 12)
1623 + ((c4 = source[startPosition + 4]) << 6)
1624 + (c5 = source[startPosition + 5]))
1626 char[][] table = charArray_length[4][(int) hash];
1628 while (++i < InternalTableSize) {
1629 char[] charArray = table[i];
1630 if ((c0 == charArray[0])
1631 && (c1 == charArray[1])
1632 && (c2 == charArray[2])
1633 && (c3 == charArray[3])
1634 && (c4 == charArray[4])
1635 && (c5 == charArray[5]))
1638 //---------other side---------
1640 int max = newEntry6;
1641 while (++i <= max) {
1642 char[] charArray = table[i];
1643 if ((c0 == charArray[0])
1644 && (c1 == charArray[1])
1645 && (c2 == charArray[2])
1646 && (c3 == charArray[3])
1647 && (c4 == charArray[4])
1648 && (c5 == charArray[5]))
1651 //--------add the entry-------
1652 if (++max >= InternalTableSize)
1655 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
1659 public final void pushLineSeparator() throws InvalidInputException {
1660 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
1661 final int INCREMENT = 250;
1663 if (this.checkNonExternalizedStringLiterals) {
1664 // reinitialize the current line for non externalize strings purpose
1667 //currentCharacter is at position currentPosition-1
1670 if (currentCharacter == '\r') {
1671 int separatorPos = currentPosition - 1;
1672 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1674 //System.out.println("CR-" + separatorPos);
1676 lineEnds[++linePtr] = separatorPos;
1677 } catch (IndexOutOfBoundsException e) {
1678 //linePtr value is correct
1679 int oldLength = lineEnds.length;
1680 int[] old = lineEnds;
1681 lineEnds = new int[oldLength + INCREMENT];
1682 System.arraycopy(old, 0, lineEnds, 0, oldLength);
1683 lineEnds[linePtr] = separatorPos;
1685 // look-ahead for merged cr+lf
1687 if (source[currentPosition] == '\n') {
1688 //System.out.println("look-ahead LF-" + currentPosition);
1689 lineEnds[linePtr] = currentPosition;
1695 } catch (IndexOutOfBoundsException e) {
1700 if (currentCharacter == '\n') { //must merge eventual cr followed by lf
1701 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
1702 //System.out.println("merge LF-" + (currentPosition - 1));
1703 lineEnds[linePtr] = currentPosition - 1;
1705 int separatorPos = currentPosition - 1;
1706 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1708 // System.out.println("LF-" + separatorPos);
1710 lineEnds[++linePtr] = separatorPos;
1711 } catch (IndexOutOfBoundsException e) {
1712 //linePtr value is correct
1713 int oldLength = lineEnds.length;
1714 int[] old = lineEnds;
1715 lineEnds = new int[oldLength + INCREMENT];
1716 System.arraycopy(old, 0, lineEnds, 0, oldLength);
1717 lineEnds[linePtr] = separatorPos;
1724 public final void pushUnicodeLineSeparator() {
1725 // isUnicode means that the \r or \n has been read as a unicode character
1727 //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
1729 final int INCREMENT = 250;
1730 //currentCharacter is at position currentPosition-1
1732 if (this.checkNonExternalizedStringLiterals) {
1733 // reinitialize the current line for non externalize strings purpose
1738 if (currentCharacter == '\r') {
1739 int separatorPos = currentPosition - 6;
1740 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1742 //System.out.println("CR-" + separatorPos);
1744 lineEnds[++linePtr] = separatorPos;
1745 } catch (IndexOutOfBoundsException e) {
1746 //linePtr value is correct
1747 int oldLength = lineEnds.length;
1748 int[] old = lineEnds;
1749 lineEnds = new int[oldLength + INCREMENT];
1750 System.arraycopy(old, 0, lineEnds, 0, oldLength);
1751 lineEnds[linePtr] = separatorPos;
1753 // look-ahead for merged cr+lf
1754 if (source[currentPosition] == '\n') {
1755 //System.out.println("look-ahead LF-" + currentPosition);
1756 lineEnds[linePtr] = currentPosition;
1764 if (currentCharacter == '\n') { //must merge eventual cr followed by lf
1765 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
1766 //System.out.println("merge LF-" + (currentPosition - 1));
1767 lineEnds[linePtr] = currentPosition - 6;
1769 int separatorPos = currentPosition - 6;
1770 if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
1772 // System.out.println("LF-" + separatorPos);
1774 lineEnds[++linePtr] = separatorPos;
1775 } catch (IndexOutOfBoundsException e) {
1776 //linePtr value is correct
1777 int oldLength = lineEnds.length;
1778 int[] old = lineEnds;
1779 lineEnds = new int[oldLength + INCREMENT];
1780 System.arraycopy(old, 0, lineEnds, 0, oldLength);
1781 lineEnds[linePtr] = separatorPos;
1788 public final void recordComment(boolean isJavadoc) {
1790 // a new annotation comment is recorded
1792 commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
1793 } catch (IndexOutOfBoundsException e) {
1794 int oldStackLength = commentStops.length;
1795 int[] oldStack = commentStops;
1796 commentStops = new int[oldStackLength + 30];
1797 System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
1798 commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
1799 //grows the positions buffers too
1800 int[] old = commentStarts;
1801 commentStarts = new int[oldStackLength + 30];
1802 System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
1805 //the buffer is of a correct size here
1806 commentStarts[commentPtr] = startPosition;
1808 public void resetTo(int begin, int end) {
1809 //reset the scanner to a given position where it may rescan again
1812 initialPosition = startPosition = currentPosition = begin;
1813 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
1814 commentPtr = -1; // reset comment stack
1817 public final void scanEscapeCharacter() throws InvalidInputException {
1818 // the string with "\\u" is a legal string of two chars \ and u
1819 //thus we use a direct access to the source (for regular cases).
1821 if (unicodeAsBackSlash) {
1822 // consume next character
1823 unicodeAsBackSlash = false;
1824 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
1825 getNextUnicodeChar();
1827 if (withoutUnicodePtr != 0) {
1828 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1832 currentCharacter = source[currentPosition++];
1833 switch (currentCharacter) {
1835 currentCharacter = '\b';
1838 currentCharacter = '\t';
1841 currentCharacter = '\n';
1844 currentCharacter = '\f';
1847 currentCharacter = '\r';
1850 currentCharacter = '\"';
1853 currentCharacter = '\'';
1856 currentCharacter = '\\';
1859 // -----------octal escape--------------
1861 // OctalDigit OctalDigit
1862 // ZeroToThree OctalDigit OctalDigit
1864 int number = Character.getNumericValue(currentCharacter);
1865 if (number >= 0 && number <= 7) {
1866 boolean zeroToThreeNot = number > 3;
1867 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
1868 int digit = Character.getNumericValue(currentCharacter);
1869 if (digit >= 0 && digit <= 7) {
1870 number = (number * 8) + digit;
1871 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
1872 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
1875 digit = Character.getNumericValue(currentCharacter);
1876 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit
1877 number = (number * 8) + digit;
1878 } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
1882 } else { // has read \OctalDigit NonDigit--> ignore last character
1885 } else { // has read \OctalDigit NonOctalDigit--> ignore last character
1888 } else { // has read \OctalDigit --> ignore last character
1892 throw new InvalidInputException(INVALID_ESCAPE);
1893 currentCharacter = (char) number;
1895 throw new InvalidInputException(INVALID_ESCAPE);
1898 public int scanIdentifierOrKeyword() throws InvalidInputException {
1901 //first dispatch on the first char.
1902 //then the length. If there are several
1903 //keywors with the same length AND the same first char, then do another
1904 //disptach on the second char :-)...cool....but fast !
1905 useAssertAsAnIndentifier = false;
1906 while (getNextCharAsJavaIdentifierPart()) {
1912 if (withoutUnicodePtr == 0)
1914 //quick test on length == 1 but not on length > 12 while most identifier
1915 //have a length which is <= 12...but there are lots of identifier with
1919 if ((length = currentPosition - startPosition) == 1)
1920 return TokenNameIdentifier;
1922 index = startPosition;
1924 if ((length = withoutUnicodePtr) == 1)
1925 return TokenNameIdentifier;
1926 data = withoutUnicodeBuffer;
1930 firstLetter = data[index];
1931 switch (firstLetter) {
1933 case 'a' : // as and array
1936 if ((data[++index] == 's')) {
1939 return TokenNameIdentifier;
1942 if ((data[++index] == 'n') && (data[++index] == 'd')) {
1945 return TokenNameIdentifier;
1948 // if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
1949 // return TokenNamearray;
1951 // return TokenNameIdentifier;
1953 return TokenNameIdentifier;
1958 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
1959 return TokenNamebreak;
1961 return TokenNameIdentifier;
1963 return TokenNameIdentifier;
1966 case 'c' : //case class continue
1969 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
1970 return TokenNamecase;
1972 return TokenNameIdentifier;
1974 if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
1975 return TokenNameclass;
1977 return TokenNameIdentifier;
1979 if ((data[++index] == 'o')
1980 && (data[++index] == 'n')
1981 && (data[++index] == 't')
1982 && (data[++index] == 'i')
1983 && (data[++index] == 'n')
1984 && (data[++index] == 'u')
1985 && (data[++index] == 'e'))
1986 return TokenNamecontinue;
1988 return TokenNameIdentifier;
1990 return TokenNameIdentifier;
1993 case 'd' : //define default do
1996 if ((data[++index] == 'o'))
1999 return TokenNameIdentifier;
2001 if ((data[++index] == 'e')
2002 && (data[++index] == 'f')
2003 && (data[++index] == 'i')
2004 && (data[++index] == 'n')
2005 && (data[++index] == 'e'))
2006 return TokenNamedefine;
2008 return TokenNameIdentifier;
2010 if ((data[++index] == 'e')
2011 && (data[++index] == 'f')
2012 && (data[++index] == 'a')
2013 && (data[++index] == 'u')
2014 && (data[++index] == 'l')
2015 && (data[++index] == 't'))
2016 return TokenNamedefault;
2018 return TokenNameIdentifier;
2020 return TokenNameIdentifier;
2022 case 'e' : //echo else elseif extends
2025 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2026 return TokenNameecho;
2027 else if ((data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2028 return TokenNameelse;
2030 return TokenNameIdentifier;
2032 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2033 return TokenNameendif;
2035 return TokenNameIdentifier;
2037 if ((data[++index] == 'n')
2038 && (data[++index] == 'd')
2039 && (data[++index] == 'f')
2040 && (data[++index] == 'o')
2041 && (data[++index] == 'r'))
2042 return TokenNameendfor;
2044 (data[++index] == 'l')
2045 && (data[++index] == 's')
2046 && (data[++index] == 'e')
2047 && (data[++index] == 'i')
2048 && (data[++index] == 'f'))
2049 return TokenNameelseif;
2051 return TokenNameIdentifier;
2053 if ((data[++index] == 'x')
2054 && (data[++index] == 't')
2055 && (data[++index] == 'e')
2056 && (data[++index] == 'n')
2057 && (data[++index] == 'd')
2058 && (data[++index] == 's'))
2059 return TokenNameextends;
2061 return TokenNameIdentifier;
2062 case 8 : // endwhile
2063 if ((data[++index] == 'n')
2064 && (data[++index] == 'd')
2065 && (data[++index] == 'w')
2066 && (data[++index] == 'h')
2067 && (data[++index] == 'i')
2068 && (data[++index] == 'l')
2069 && (data[++index] == 'e'))
2070 return TokenNameendwhile;
2072 return TokenNameIdentifier;
2073 case 9 : // endswitch
2074 if ((data[++index] == 'n')
2075 && (data[++index] == 'd')
2076 && (data[++index] == 's')
2077 && (data[++index] == 'w')
2078 && (data[++index] == 'i')
2079 && (data[++index] == 't')
2080 && (data[++index] == 'c')
2081 && (data[++index] == 'h'))
2082 return TokenNameendswitch;
2084 return TokenNameIdentifier;
2085 case 10 : // endforeach
2086 if ((data[++index] == 'n')
2087 && (data[++index] == 'd')
2088 && (data[++index] == 'f')
2089 && (data[++index] == 'o')
2090 && (data[++index] == 'r')
2091 && (data[++index] == 'e')
2092 && (data[++index] == 'a')
2093 && (data[++index] == 'c')
2094 && (data[++index] == 'h'))
2095 return TokenNameendforeach;
2097 return TokenNameIdentifier;
2100 return TokenNameIdentifier;
2103 case 'f' : //for false function
2106 if ((data[++index] == 'o') && (data[++index] == 'r'))
2107 return TokenNamefor;
2109 return TokenNameIdentifier;
2111 if ((data[index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2112 return TokenNamefalse;
2114 return TokenNameIdentifier;
2115 case 8 : // function
2116 if ((data[index] == 'u')
2117 && (data[++index] == 'n')
2118 && (data[++index] == 'c')
2119 && (data[++index] == 't')
2120 && (data[++index] == 'i')
2121 && (data[++index] == 'o')
2122 && (data[++index] == 'n'))
2123 return TokenNamefunction;
2125 return TokenNameIdentifier;
2127 return TokenNameIdentifier;
2131 if ((data[++index] == 'l')
2132 && (data[++index] == 'o')
2133 && (data[++index] == 'b')
2134 && (data[++index] == 'a')
2135 && (data[++index] == 'l')) {
2136 return TokenNameglobal;
2139 return TokenNameIdentifier;
2144 if (data[++index] == 'f')
2147 return TokenNameIdentifier;
2149 // if ((data[++index] == 'n') && (data[++index] == 't'))
2150 // return TokenNameint;
2152 // return TokenNameIdentifier;
2154 if ((data[++index] == 'n')
2155 && (data[++index] == 'c')
2156 && (data[++index] == 'l')
2157 && (data[++index] == 'u')
2158 && (data[++index] == 'd')
2159 && (data[++index] == 'e'))
2160 return TokenNameinclude;
2162 return TokenNameIdentifier;
2164 if ((data[++index] == 'n')
2165 && (data[++index] == 'c')
2166 && (data[++index] == 'l')
2167 && (data[++index] == 'u')
2168 && (data[++index] == 'd')
2169 && (data[++index] == 'e')
2170 && (data[++index] == '_')
2171 && (data[++index] == 'o')
2172 && (data[++index] == 'n')
2173 && (data[++index] == 'c')
2174 && (data[++index] == 'e'))
2175 return TokenNameinclude_once;
2177 return TokenNameIdentifier;
2179 return TokenNameIdentifier;
2184 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2185 return TokenNamelist;
2188 return TokenNameIdentifier;
2190 case 'n' : // new null
2193 if ((data[++index] == 'e') && (data[++index] == 'w'))
2194 return TokenNamenew;
2196 return TokenNameIdentifier;
2198 if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2199 return TokenNamenull;
2201 return TokenNameIdentifier;
2204 return TokenNameIdentifier;
2206 case 'o' : // or old_function
2208 if (data[++index] == 'r') {
2212 // if (length == 12) {
2213 // if ((data[++index] == 'l')
2214 // && (data[++index] == 'd')
2215 // && (data[++index] == '_')
2216 // && (data[++index] == 'f')
2217 // && (data[++index] == 'u')
2218 // && (data[++index] == 'n')
2219 // && (data[++index] == 'c')
2220 // && (data[++index] == 't')
2221 // && (data[++index] == 'i')
2222 // && (data[++index] == 'o')
2223 // && (data[++index] == 'n')) {
2224 // return TokenNameold_function;
2227 return TokenNameIdentifier;
2231 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2232 return TokenNameprint;
2235 return TokenNameIdentifier;
2236 case 'r' : //return require require_once
2238 if ((data[++index] == 'e')
2239 && (data[++index] == 't')
2240 && (data[++index] == 'u')
2241 && (data[++index] == 'r')
2242 && (data[++index] == 'n')) {
2243 return TokenNamereturn;
2245 } else if (length == 7) {
2246 if ((data[++index] == 'e')
2247 && (data[++index] == 'q')
2248 && (data[++index] == 'u')
2249 && (data[++index] == 'i')
2250 && (data[++index] == 'r')
2251 && (data[++index] == 'e')) {
2252 return TokenNamerequire;
2254 } else if (length == 12) {
2255 if ((data[++index] == 'e')
2256 && (data[++index] == 'q')
2257 && (data[++index] == 'u')
2258 && (data[++index] == 'i')
2259 && (data[++index] == 'r')
2260 && (data[++index] == 'e')
2261 && (data[++index] == '_')
2262 && (data[++index] == 'o')
2263 && (data[++index] == 'n')
2264 && (data[++index] == 'c')
2265 && (data[++index] == 'e')) {
2266 return TokenNamerequire_once;
2269 return TokenNameIdentifier;
2271 case 's' : //static switch
2274 if (data[++index] == 't')
2275 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2276 return TokenNamestatic;
2278 return TokenNameIdentifier;
2280 (data[index] == 'w')
2281 && (data[++index] == 'i')
2282 && (data[++index] == 't')
2283 && (data[++index] == 'c')
2284 && (data[++index] == 'h'))
2285 return TokenNameswitch;
2287 return TokenNameIdentifier;
2289 return TokenNameIdentifier;
2296 if ((data[index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2297 return TokenNametrue;
2299 return TokenNameIdentifier;
2300 // if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2301 // return TokenNamethis;
2304 return TokenNameIdentifier;
2307 case 'v' : //void volatile
2310 if ((data[++index] == 'a') && (data[++index] == 'r'))
2311 return TokenNamevar;
2313 return TokenNameIdentifier;
2316 return TokenNameIdentifier;
2319 case 'w' : //while widefp
2322 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2323 return TokenNamewhile;
2325 return TokenNameIdentifier;
2326 //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2327 //return TokenNamewidefp ;
2329 //return TokenNameIdentifier;
2331 return TokenNameIdentifier;
2337 if ((data[++index] == 'o') && (data[++index] == 'r'))
2338 return TokenNamexor;
2340 return TokenNameIdentifier;
2343 return TokenNameIdentifier;
2346 return TokenNameIdentifier;
2349 public int scanNumber(boolean dotPrefix) throws InvalidInputException {
2351 //when entering this method the currentCharacter is the firt
2352 //digit of the number , i.e. it may be preceeded by a . when
2355 boolean floating = dotPrefix;
2356 if ((!dotPrefix) && (currentCharacter == '0')) {
2357 if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
2358 //force the first char of the hexa number do exist...
2359 // consume next character
2360 unicodeAsBackSlash = false;
2361 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2362 getNextUnicodeChar();
2364 if (withoutUnicodePtr != 0) {
2365 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2368 if (Character.digit(currentCharacter, 16) == -1)
2369 throw new InvalidInputException(INVALID_HEXA);
2371 while (getNextCharAsDigit(16)) {
2373 if (getNextChar('l', 'L') >= 0)
2374 return TokenNameLongLiteral;
2376 return TokenNameIntegerLiteral;
2379 //there is x or X in the number
2380 //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
2381 if (getNextCharAsDigit()) { //-------------potential octal-----------------
2382 while (getNextCharAsDigit()) {
2385 if (getNextChar('l', 'L') >= 0) {
2386 return TokenNameLongLiteral;
2389 if (getNextChar('f', 'F') >= 0) {
2390 return TokenNameFloatingPointLiteral;
2393 if (getNextChar('d', 'D') >= 0) {
2394 return TokenNameDoubleLiteral;
2395 } else { //make the distinction between octal and float ....
2396 if (getNextChar('.')) { //bingo ! ....
2397 while (getNextCharAsDigit()) {
2399 if (getNextChar('e', 'E') >= 0) { // consume next character
2400 unicodeAsBackSlash = false;
2401 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2402 getNextUnicodeChar();
2404 if (withoutUnicodePtr != 0) {
2405 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2409 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
2410 unicodeAsBackSlash = false;
2411 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2412 getNextUnicodeChar();
2414 if (withoutUnicodePtr != 0) {
2415 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2419 if (!Character.isDigit(currentCharacter))
2420 throw new InvalidInputException(INVALID_FLOAT);
2421 while (getNextCharAsDigit()) {
2424 if (getNextChar('f', 'F') >= 0)
2425 return TokenNameFloatingPointLiteral;
2426 getNextChar('d', 'D'); //jump over potential d or D
2427 return TokenNameDoubleLiteral;
2429 return TokenNameIntegerLiteral;
2437 while (getNextCharAsDigit()) {
2440 if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
2441 return TokenNameLongLiteral;
2443 if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
2444 while (getNextCharAsDigit()) {
2449 //if floating is true both exponant and suffix may be optional
2451 if (getNextChar('e', 'E') >= 0) {
2453 // consume next character
2454 unicodeAsBackSlash = false;
2455 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2456 getNextUnicodeChar();
2458 if (withoutUnicodePtr != 0) {
2459 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2463 if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
2464 unicodeAsBackSlash = false;
2465 if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
2466 getNextUnicodeChar();
2468 if (withoutUnicodePtr != 0) {
2469 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2473 if (!Character.isDigit(currentCharacter))
2474 throw new InvalidInputException(INVALID_FLOAT);
2475 while (getNextCharAsDigit()) {
2479 if (getNextChar('d', 'D') >= 0)
2480 return TokenNameDoubleLiteral;
2481 if (getNextChar('f', 'F') >= 0)
2482 return TokenNameFloatingPointLiteral;
2484 //the long flag has been tested before
2486 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
2489 * Search the line number corresponding to a specific position
2492 public final int getLineNumber(int position) {
2494 if (lineEnds == null)
2496 int length = linePtr + 1;
2499 int g = 0, d = length - 1;
2503 if (position < lineEnds[m]) {
2505 } else if (position > lineEnds[m]) {
2511 if (position < lineEnds[m]) {
2516 public final void setSource(char[] source) {
2517 //the source-buffer is set to sourceString
2519 if (source == null) {
2520 this.source = new char[0];
2522 this.source = source;
2525 initialPosition = currentPosition = 0;
2526 containsAssertKeyword = false;
2527 withoutUnicodeBuffer = new char[this.source.length];
2531 public String toString() {
2532 if (startPosition == source.length)
2533 return "EOF\n\n" + new String(source); //$NON-NLS-1$
2534 if (currentPosition > source.length)
2535 return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
2537 char front[] = new char[startPosition];
2538 System.arraycopy(source, 0, front, 0, startPosition);
2540 int middleLength = (currentPosition - 1) - startPosition + 1;
2542 if (middleLength > -1) {
2543 middle = new char[middleLength];
2544 System.arraycopy(source, startPosition, middle, 0, middleLength);
2546 middle = new char[0];
2549 char end[] = new char[source.length - (currentPosition - 1)];
2550 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
2552 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
2553 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
2556 public final String toStringAction(int act) {
2558 case TokenNameIdentifier :
2559 return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2560 // case TokenNameabstract :
2561 // return "abstract"; //$NON-NLS-1$
2562 // case TokenNameboolean :
2563 // return "boolean"; //$NON-NLS-1$
2564 case TokenNamebreak :
2565 return "break"; //$NON-NLS-1$
2566 // case TokenNamebyte :
2567 // return "byte"; //$NON-NLS-1$
2568 case TokenNamecase :
2569 return "case"; //$NON-NLS-1$
2570 // case TokenNamecatch :
2571 // return "catch"; //$NON-NLS-1$
2572 // case TokenNamechar :
2573 // return "char"; //$NON-NLS-1$
2574 case TokenNameclass :
2575 return "class"; //$NON-NLS-1$
2576 case TokenNamecontinue :
2577 return "continue"; //$NON-NLS-1$
2578 case TokenNamedefault :
2579 return "default"; //$NON-NLS-1$
2581 return "do"; //$NON-NLS-1$
2582 // case TokenNamedouble :
2583 // return "double"; //$NON-NLS-1$
2584 case TokenNameelse :
2585 return "else"; //$NON-NLS-1$
2586 case TokenNameextends :
2587 return "extends"; //$NON-NLS-1$
2588 case TokenNamefalse :
2589 return "false"; //$NON-NLS-1$
2590 // case TokenNamefinal :
2591 // return "final"; //$NON-NLS-1$
2592 // case TokenNamefinally :
2593 // return "finally"; //$NON-NLS-1$
2594 // case TokenNamefloat :
2595 // return "float"; //$NON-NLS-1$
2597 return "for"; //$NON-NLS-1$
2599 return "if"; //$NON-NLS-1$
2600 // case TokenNameimplements :
2601 // return "implements"; //$NON-NLS-1$
2602 // case TokenNameimport :
2603 // return "import"; //$NON-NLS-1$
2604 // case TokenNameinstanceof :
2605 // return "instanceof"; //$NON-NLS-1$
2606 // case TokenNameint :
2607 // return "int"; //$NON-NLS-1$
2608 // case TokenNameinterface :
2609 // return "interface"; //$NON-NLS-1$
2610 // case TokenNamelong :
2611 // return "long"; //$NON-NLS-1$
2612 // case TokenNamenative :
2613 // return "native"; //$NON-NLS-1$
2615 return "new"; //$NON-NLS-1$
2616 case TokenNamenull :
2617 return "null"; //$NON-NLS-1$
2618 // case TokenNamepackage :
2619 // return "package"; //$NON-NLS-1$
2620 // case TokenNameprivate :
2621 // return "private"; //$NON-NLS-1$
2622 // case TokenNameprotected :
2623 // return "protected"; //$NON-NLS-1$
2624 // case TokenNamepublic :
2625 // return "public"; //$NON-NLS-1$
2626 case TokenNamereturn :
2627 return "return"; //$NON-NLS-1$
2628 // case TokenNameshort :
2629 // return "short"; //$NON-NLS-1$
2630 case TokenNamestatic :
2631 return "static"; //$NON-NLS-1$
2632 // case TokenNamesuper :
2633 // return "super"; //$NON-NLS-1$
2634 case TokenNameswitch :
2635 return "switch"; //$NON-NLS-1$
2636 // case TokenNamesynchronized :
2637 // return "synchronized"; //$NON-NLS-1$
2638 // case TokenNamethis :
2639 // return "this"; //$NON-NLS-1$
2640 // case TokenNamethrow :
2641 // return "throw"; //$NON-NLS-1$
2642 // case TokenNamethrows :
2643 // return "throws"; //$NON-NLS-1$
2644 // case TokenNametransient :
2645 // return "transient"; //$NON-NLS-1$
2646 case TokenNametrue :
2647 return "true"; //$NON-NLS-1$
2648 // case TokenNametry :
2649 // return "try"; //$NON-NLS-1$
2650 // case TokenNamevoid :
2651 // return "void"; //$NON-NLS-1$
2652 // case TokenNamevolatile :
2653 // return "volatile"; //$NON-NLS-1$
2654 case TokenNamewhile :
2655 return "while"; //$NON-NLS-1$
2657 case TokenNameIntegerLiteral :
2658 return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2659 case TokenNameLongLiteral :
2660 return "Long(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2661 case TokenNameFloatingPointLiteral :
2662 return "Float(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2663 case TokenNameDoubleLiteral :
2664 return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2665 case TokenNameCharacterLiteral :
2666 return "Char(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2667 case TokenNameStringLiteral :
2668 return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
2670 case TokenNamePLUS_PLUS :
2671 return "++"; //$NON-NLS-1$
2672 case TokenNameMINUS_MINUS :
2673 return "--"; //$NON-NLS-1$
2674 case TokenNameEQUAL_EQUAL :
2675 return "=="; //$NON-NLS-1$
2676 case TokenNameLESS_EQUAL :
2677 return "<="; //$NON-NLS-1$
2678 case TokenNameGREATER_EQUAL :
2679 return ">="; //$NON-NLS-1$
2680 case TokenNameNOT_EQUAL :
2681 return "!="; //$NON-NLS-1$
2682 case TokenNameLEFT_SHIFT :
2683 return "<<"; //$NON-NLS-1$
2684 case TokenNameRIGHT_SHIFT :
2685 return ">>"; //$NON-NLS-1$
2686 case TokenNameUNSIGNED_RIGHT_SHIFT :
2687 return ">>>"; //$NON-NLS-1$
2688 case TokenNamePLUS_EQUAL :
2689 return "+="; //$NON-NLS-1$
2690 case TokenNameMINUS_EQUAL :
2691 return "-="; //$NON-NLS-1$
2692 case TokenNameMULTIPLY_EQUAL :
2693 return "*="; //$NON-NLS-1$
2694 case TokenNameDIVIDE_EQUAL :
2695 return "/="; //$NON-NLS-1$
2696 case TokenNameAND_EQUAL :
2697 return "&="; //$NON-NLS-1$
2698 case TokenNameOR_EQUAL :
2699 return "|="; //$NON-NLS-1$
2700 case TokenNameXOR_EQUAL :
2701 return "^="; //$NON-NLS-1$
2702 case TokenNameREMAINDER_EQUAL :
2703 return "%="; //$NON-NLS-1$
2704 case TokenNameLEFT_SHIFT_EQUAL :
2705 return "<<="; //$NON-NLS-1$
2706 case TokenNameRIGHT_SHIFT_EQUAL :
2707 return ">>="; //$NON-NLS-1$
2708 case TokenNameUNSIGNED_RIGHT_SHIFT_EQUAL :
2709 return ">>>="; //$NON-NLS-1$
2710 case TokenNameOR_OR :
2711 return "||"; //$NON-NLS-1$
2712 case TokenNameAND_AND :
2713 return "&&"; //$NON-NLS-1$
2714 case TokenNamePLUS :
2715 return "+"; //$NON-NLS-1$
2716 case TokenNameMINUS :
2717 return "-"; //$NON-NLS-1$
2719 return "!"; //$NON-NLS-1$
2720 case TokenNameREMAINDER :
2721 return "%"; //$NON-NLS-1$
2723 return "^"; //$NON-NLS-1$
2725 return "&"; //$NON-NLS-1$
2726 case TokenNameMULTIPLY :
2727 return "*"; //$NON-NLS-1$
2729 return "|"; //$NON-NLS-1$
2730 case TokenNameTWIDDLE :
2731 return "~"; //$NON-NLS-1$
2732 case TokenNameDIVIDE :
2733 return "/"; //$NON-NLS-1$
2734 case TokenNameGREATER :
2735 return ">"; //$NON-NLS-1$
2736 case TokenNameLESS :
2737 return "<"; //$NON-NLS-1$
2738 case TokenNameLPAREN :
2739 return "("; //$NON-NLS-1$
2740 case TokenNameRPAREN :
2741 return ")"; //$NON-NLS-1$
2742 case TokenNameLBRACE :
2743 return "{"; //$NON-NLS-1$
2744 case TokenNameRBRACE :
2745 return "}"; //$NON-NLS-1$
2746 case TokenNameLBRACKET :
2747 return "["; //$NON-NLS-1$
2748 case TokenNameRBRACKET :
2749 return "]"; //$NON-NLS-1$
2750 case TokenNameSEMICOLON :
2751 return ";"; //$NON-NLS-1$
2752 case TokenNameQUESTION :
2753 return "?"; //$NON-NLS-1$
2754 case TokenNameCOLON :
2755 return ":"; //$NON-NLS-1$
2756 case TokenNameCOMMA :
2757 return ","; //$NON-NLS-1$
2759 return "."; //$NON-NLS-1$
2760 case TokenNameEQUAL :
2761 return "="; //$NON-NLS-1$
2763 return "EOF"; //$NON-NLS-1$
2765 return "not-a-token"; //$NON-NLS-1$
2769 public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
2770 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
2774 boolean tokenizeComments,
2775 boolean tokenizeWhiteSpace,
2776 boolean checkNonExternalizedStringLiterals,
2777 boolean assertMode) {
2778 this.eofPosition = Integer.MAX_VALUE;
2779 this.tokenizeComments = tokenizeComments;
2780 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
2781 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
2782 this.assertMode = assertMode;
2785 private void checkNonExternalizeString() throws InvalidInputException {
2786 if (currentLine == null)
2788 parseTags(currentLine);
2791 private void parseTags(NLSLine line) throws InvalidInputException {
2792 String s = new String(getCurrentTokenSource());
2793 int pos = s.indexOf(TAG_PREFIX);
2794 int lineLength = line.size();
2796 int start = pos + TAG_PREFIX_LENGTH;
2797 int end = s.indexOf(TAG_POSTFIX, start);
2798 String index = s.substring(start, end);
2801 i = Integer.parseInt(index) - 1; // Tags are one based not zero based.
2802 } catch (NumberFormatException e) {
2803 i = -1; // we don't want to consider this as a valid NLS tag
2805 if (line.exists(i)) {
2808 pos = s.indexOf(TAG_PREFIX, start);
2811 this.nonNLSStrings = new StringLiteral[lineLength];
2812 int nonNLSCounter = 0;
2813 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
2814 StringLiteral literal = (StringLiteral) iterator.next();
2815 if (literal != null) {
2816 this.nonNLSStrings[nonNLSCounter++] = literal;
2819 if (nonNLSCounter == 0) {
2820 this.nonNLSStrings = null;
2824 this.wasNonExternalizedStringLiteral = true;
2825 if (nonNLSCounter != lineLength) {
2826 System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);