Implemented a simple occurrences finder for Variables ($...) and Identifiers;
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /***********************************************************************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
5  *
6  * Contributors: IBM Corporation - initial API and implementation
7  **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
9
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20
21 public class Scanner implements IScanner, ITerminalSymbols {
22         /*
23          * APIs ares - getNextToken() which return the current type of the token (this
24          * value is not memorized by the scanner) - getCurrentTokenSource() which
25          * provides with the token "REAL" source (aka all unicode have been
26          * transformed into a correct char) - sourceStart gives the position into the
27          * stream - currentPosition-1 gives the sourceEnd position into the stream
28          */
29         // 1.4 feature
30 //      private boolean assertMode;
31
32         public boolean useAssertAsAnIndentifier = false;
33
34         // flag indicating if processed source contains occurrences of keyword assert
35         public boolean containsAssertKeyword = false;
36
37         public boolean recordLineSeparator;
38
39         public boolean ignorePHPOneLiner = false;
40
41         public boolean phpMode = false;
42
43         public boolean phpExpressionTag = false;
44
45         // public Stack encapsedStringStack = null;
46
47         public char currentCharacter;
48
49         public int startPosition;
50
51         public int currentPosition;
52
53         public int initialPosition, eofPosition;
54
55         // after this position eof are generated instead of real token from the
56         // source
57         public boolean tokenizeComments;
58
59         public boolean tokenizeWhiteSpace;
60
61         public boolean tokenizeStrings;
62
63         // source should be viewed as a window (aka a part)
64         // of a entire very large stream
65         public char source[];
66
67         // unicode support
68         public char[] withoutUnicodeBuffer;
69
70         public int withoutUnicodePtr;
71
72         // when == 0 ==> no unicode in the current token
73         public boolean unicodeAsBackSlash = false;
74
75         public boolean scanningFloatLiteral = false;
76
77         // support for /** comments
78         public int[] commentStops = new int[10];
79
80         public int[] commentStarts = new int[10];
81
82         public int commentPtr = -1; // no comment test with commentPtr value -1
83
84         protected int lastCommentLinePosition = -1;
85
86         // diet parsing support - jump over some method body when requested
87         public boolean diet = false;
88
89         // support for the poor-line-debuggers ....
90         // remember the position of the cr/lf
91         public int[] lineEnds = new int[250];
92
93         public int linePtr = -1;
94
95         public boolean wasAcr = false;
96
97         public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
98
99         public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
100
101         public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
102
103         public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
104
105         public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
106
107         public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
108
109         public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
110
111         public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
112
113         public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
114
115         public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
116
117         public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
118
119         public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
120
121         // ----------------optimized identifier managment------------------
122         static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
123                         charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
124                         charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
125                         charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
126                         charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
127                         charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
128                         charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
129                         charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
130                         charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
131
132         static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133                         '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
134                         'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135                         charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136                         charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137                         charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138                         charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139                         charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140                         charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
141
142         public final static int MAX_OBVIOUS = 256;
143
144         static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
145
146         public final static int C_DOLLAR = 8;
147
148         public final static int C_LETTER = 4;
149
150         public final static int C_DIGIT = 3;
151
152         public final static int C_SEPARATOR = 2;
153
154         public final static int C_SPACE = 1;
155         static {
156                 for (int i = '0'; i <= '9'; i++)
157                         ObviousIdentCharNatures[i] = C_DIGIT;
158
159                 for (int i = 'a'; i <= 'z'; i++)
160                         ObviousIdentCharNatures[i] = C_LETTER;
161                 for (int i = 'A'; i <= 'Z'; i++)
162                         ObviousIdentCharNatures[i] = C_LETTER;
163                 ObviousIdentCharNatures['_'] = C_LETTER;
164                 for (int i = 127; i <= 255; i++)
165                         ObviousIdentCharNatures[i] = C_LETTER;
166
167                 ObviousIdentCharNatures['$'] = C_DOLLAR;
168
169                 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
170                 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
171                 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
172                 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
173                 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
174
175                 ObviousIdentCharNatures['.'] = C_SEPARATOR;
176                 ObviousIdentCharNatures[':'] = C_SEPARATOR;
177                 ObviousIdentCharNatures[';'] = C_SEPARATOR;
178                 ObviousIdentCharNatures[','] = C_SEPARATOR;
179                 ObviousIdentCharNatures['['] = C_SEPARATOR;
180                 ObviousIdentCharNatures[']'] = C_SEPARATOR;
181                 ObviousIdentCharNatures['('] = C_SEPARATOR;
182                 ObviousIdentCharNatures[')'] = C_SEPARATOR;
183                 ObviousIdentCharNatures['{'] = C_SEPARATOR;
184                 ObviousIdentCharNatures['}'] = C_SEPARATOR;
185                 ObviousIdentCharNatures['+'] = C_SEPARATOR;
186                 ObviousIdentCharNatures['-'] = C_SEPARATOR;
187                 ObviousIdentCharNatures['*'] = C_SEPARATOR;
188                 ObviousIdentCharNatures['/'] = C_SEPARATOR;
189                 ObviousIdentCharNatures['='] = C_SEPARATOR;
190                 ObviousIdentCharNatures['&'] = C_SEPARATOR;
191                 ObviousIdentCharNatures['|'] = C_SEPARATOR;
192                 ObviousIdentCharNatures['?'] = C_SEPARATOR;
193                 ObviousIdentCharNatures['<'] = C_SEPARATOR;
194                 ObviousIdentCharNatures['>'] = C_SEPARATOR;
195                 ObviousIdentCharNatures['!'] = C_SEPARATOR;
196                 ObviousIdentCharNatures['%'] = C_SEPARATOR;
197                 ObviousIdentCharNatures['^'] = C_SEPARATOR;
198                 ObviousIdentCharNatures['~'] = C_SEPARATOR;
199                 ObviousIdentCharNatures['"'] = C_SEPARATOR;
200                 ObviousIdentCharNatures['\''] = C_SEPARATOR;
201         }
202         static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
203
204         static final int TableSize = 30, InternalTableSize = 6;
205
206         // 30*6 = 180 entries
207         public static final int OptimizedLength = 6;
208
209         public/* static */
210         final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
211
212         // support for detecting non-externalized string literals
213         int currentLineNr = -1;
214
215         int previousLineNr = -1;
216
217         NLSLine currentLine = null;
218
219         List lines = new ArrayList();
220
221         public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
222
223         public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
224
225         public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
226
227         public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
228
229         public StringLiteral[] nonNLSStrings = null;
230
231         public boolean checkNonExternalizedStringLiterals = true;
232
233         public boolean wasNonExternalizedStringLiteral = false;
234
235         /* static */{
236                 for (int i = 0; i < 6; i++) {
237                         for (int j = 0; j < TableSize; j++) {
238                                 for (int k = 0; k < InternalTableSize; k++) {
239                                         charArray_length[i][j][k] = initCharArray;
240                                 }
241                         }
242                 }
243         }
244
245         static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
246
247         public static final int RoundBracket = 0;
248
249         public static final int SquareBracket = 1;
250
251         public static final int CurlyBracket = 2;
252
253         public static final int BracketKinds = 3;
254
255         // task tag support
256         public char[][] foundTaskTags = null;
257
258         public char[][] foundTaskMessages;
259
260         public char[][] foundTaskPriorities = null;
261
262         public int[][] foundTaskPositions;
263
264         public int foundTaskCount = 0;
265
266         public char[][] taskTags = null;
267
268         public char[][] taskPriorities = null;
269
270         public boolean isTaskCaseSensitive = true;
271
272         public static final boolean DEBUG = false;
273
274         public static final boolean TRACE = false;
275
276         public ICompilationUnit compilationUnit = null;
277
278         /**
279          * Determines if the specified character is permissible as the first character
280          * in a PHP identifier or variable
281          *
282          * The '$' character for PHP variables is regarded as a correct first
283          * character !
284          *
285          */
286         public static boolean isPHPIdentOrVarStart(char ch) {
287                 if (ch < MAX_OBVIOUS) {
288                         return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DOLLAR;
289                 }
290                 return false;
291                 //return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
292         }
293
294         /**
295          * Determines if the specified character is permissible as the first character
296          * in a PHP identifier.
297          *
298          * The '$' character for PHP variables isn't regarded as the first character !
299          */
300         public static boolean isPHPIdentifierStart(char ch) {
301                 if (ch < MAX_OBVIOUS) {
302                         return ObviousIdentCharNatures[ch]==C_LETTER;
303                 }
304                 return false;
305 //              return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
306         }
307
308         /**
309          * Determines if the specified character may be part of a PHP identifier as
310          * other than the first character
311          */
312         public static boolean isPHPIdentifierPart(char ch) {
313                 if (ch < MAX_OBVIOUS) {
314                         return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
315                 }
316                 return false;
317 //              return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
318         }
319
320         public static boolean isSQLIdentifierPart(char ch) {
321                 if (ch < MAX_OBVIOUS) {
322                         return ObviousIdentCharNatures[ch]==C_LETTER || ObviousIdentCharNatures[ch]==C_DIGIT;
323                 }
324                 return false;
325         }
326
327         public final boolean atEnd() {
328                 // This code is not relevant if source is
329                 // Only a part of the real stream input
330                 return source.length == currentPosition;
331         }
332
333         public char[] getCurrentIdentifierSource() {
334                 // return the token REAL source (aka unicodes are precomputed)
335                 char[] result;
336                 // if (withoutUnicodePtr != 0)
337                 // //0 is used as a fast test flag so the real first char is in position 1
338                 // System.arraycopy(
339                 // withoutUnicodeBuffer,
340                 // 1,
341                 // result = new char[withoutUnicodePtr],
342                 // 0,
343                 // withoutUnicodePtr);
344                 // else {
345                 int length = currentPosition - startPosition;
346                 switch (length) { // see OptimizedLength
347                 case 1:
348                         return optimizedCurrentTokenSource1();
349                 case 2:
350                         return optimizedCurrentTokenSource2();
351                 case 3:
352                         return optimizedCurrentTokenSource3();
353                 case 4:
354                         return optimizedCurrentTokenSource4();
355                 case 5:
356                         return optimizedCurrentTokenSource5();
357                 case 6:
358                         return optimizedCurrentTokenSource6();
359                 }
360                 // no optimization
361                 System.arraycopy(source, startPosition, result = new char[length], 0, length);
362                 // }
363                 return result;
364         }
365
366         public int getCurrentTokenEndPosition() {
367                 return this.currentPosition - 1;
368         }
369
370         public final char[] getCurrentTokenSource() {
371                 // Return the token REAL source (aka unicodes are precomputed)
372                 char[] result;
373                 // if (withoutUnicodePtr != 0)
374                 // // 0 is used as a fast test flag so the real first char is in position 1
375                 // System.arraycopy(
376                 // withoutUnicodeBuffer,
377                 // 1,
378                 // result = new char[withoutUnicodePtr],
379                 // 0,
380                 // withoutUnicodePtr);
381                 // else {
382                 int length;
383                 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
384                 // }
385                 return result;
386         }
387
388         public final char[] getCurrentTokenSource(int startPos) {
389                 // Return the token REAL source (aka unicodes are precomputed)
390                 char[] result;
391                 // if (withoutUnicodePtr != 0)
392                 // // 0 is used as a fast test flag so the real first char is in position 1
393                 // System.arraycopy(
394                 // withoutUnicodeBuffer,
395                 // 1,
396                 // result = new char[withoutUnicodePtr],
397                 // 0,
398                 // withoutUnicodePtr);
399                 // else {
400                 int length;
401                 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
402                 // }
403                 return result;
404         }
405
406         public final char[] getCurrentTokenSourceString() {
407                 // return the token REAL source (aka unicodes are precomputed).
408                 // REMOVE the two " that are at the beginning and the end.
409                 char[] result;
410                 if (withoutUnicodePtr != 0)
411                         // 0 is used as a fast test flag so the real first char is in position 1
412                         System.arraycopy(withoutUnicodeBuffer, 2,
413                         // 2 is 1 (real start) + 1 (to jump over the ")
414                                         result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
415                 else {
416                         int length;
417                         System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
418                 }
419                 return result;
420         }
421
422         public final boolean equalsCurrentTokenSource(char[] word) {
423                 if (word.length != currentPosition - startPosition) {
424                         return false;
425                 }
426                 for (int i = 0; i < word.length; i++) {
427                         if (word[i]!=source[startPosition+i]){
428                                 return false;
429                         }
430                 }
431                 return true;
432         }
433
434         public final char[] getRawTokenSourceEnd() {
435                 int length = this.eofPosition - this.currentPosition - 1;
436                 char[] sourceEnd = new char[length];
437                 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
438                 return sourceEnd;
439         }
440
441         public int getCurrentTokenStartPosition() {
442                 return this.startPosition;
443         }
444
445         public final String getCurrentStringLiteral() {
446                 char[] result = getCurrentStringLiteralSource();
447                 return new String(result);
448         }
449
450         public final char[] getCurrentStringLiteralSource() {
451                 // Return the token REAL source (aka unicodes are precomputed)
452                 if (startPosition + 1 >= currentPosition) {
453                         return new char[0];
454                 }
455                 char[] result;
456                 int length;
457                 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
458                 // }
459                 return result;
460         }
461
462         public final char[] getCurrentStringLiteralSource(int startPos) {
463                 // Return the token REAL source (aka unicodes are precomputed)
464                 char[] result;
465                 int length;
466                 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
467                 // }
468                 return result;
469         }
470
471         /*
472          * Search the source position corresponding to the end of a given line number
473          *
474          * Line numbers are 1-based, and relative to the scanner initialPosition.
475          * Character positions are 0-based.
476          *
477          * In case the given line number is inconsistent, answers -1.
478          */
479         public final int getLineEnd(int lineNumber) {
480                 if (lineEnds == null)
481                         return -1;
482                 if (lineNumber >= lineEnds.length)
483                         return -1;
484                 if (lineNumber <= 0)
485                         return -1;
486                 if (lineNumber == lineEnds.length - 1)
487                         return eofPosition;
488                 return lineEnds[lineNumber - 1];
489                 // next line start one character behind the lineEnd of the previous line
490         }
491
492         /**
493          * Search the source position corresponding to the beginning of a given line
494          * number
495          *
496          * Line numbers are 1-based, and relative to the scanner initialPosition.
497          * Character positions are 0-based.
498          *
499          * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
500          *
501          * In case the given line number is inconsistent, answers -1.
502          */
503         public final int getLineStart(int lineNumber) {
504                 if (lineEnds == null)
505                         return -1;
506                 if (lineNumber >= lineEnds.length)
507                         return -1;
508                 if (lineNumber <= 0)
509                         return -1;
510                 if (lineNumber == 1)
511                         return initialPosition;
512                 return lineEnds[lineNumber - 2] + 1;
513                 // next line start one character behind the lineEnd of the previous line
514         }
515
516         public final boolean getNextChar(char testedChar) {
517                 // BOOLEAN
518                 // handle the case of unicode.
519                 // when a unicode appears then we must use a buffer that holds char
520                 // internal values
521                 // At the end of this method currentCharacter holds the new visited char
522                 // and currentPosition points right next after it
523                 // Both previous lines are true if the currentCharacter is == to the
524                 // testedChar
525                 // On false, no side effect has occured.
526                 // ALL getNextChar.... ARE OPTIMIZED COPIES
527                 int temp = currentPosition;
528                 try {
529                         currentCharacter = source[currentPosition++];
530                         // if (((currentCharacter = source[currentPosition++]) == '\\')
531                         // && (source[currentPosition] == 'u')) {
532                         // //-------------unicode traitement ------------
533                         // int c1, c2, c3, c4;
534                         // int unicodeSize = 6;
535                         // currentPosition++;
536                         // while (source[currentPosition] == 'u') {
537                         // currentPosition++;
538                         // unicodeSize++;
539                         // }
540                         //
541                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
542                         // || c1 < 0)
543                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
544                         // || c2 < 0)
545                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
546                         // || c3 < 0)
547                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
548                         // || c4 < 0)) {
549                         // currentPosition = temp;
550                         // return false;
551                         // }
552                         //
553                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
554                         // if (currentCharacter != testedChar) {
555                         // currentPosition = temp;
556                         // return false;
557                         // }
558                         // unicodeAsBackSlash = currentCharacter == '\\';
559                         //
560                         // //need the unicode buffer
561                         // if (withoutUnicodePtr == 0) {
562                         // //buffer all the entries that have been left aside....
563                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
564                         // System.arraycopy(
565                         // source,
566                         // startPosition,
567                         // withoutUnicodeBuffer,
568                         // 1,
569                         // withoutUnicodePtr);
570                         // }
571                         // //fill the buffer with the char
572                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
573                         // return true;
574                         //
575                         // } //-------------end unicode traitement--------------
576                         // else {
577                         if (currentCharacter != testedChar) {
578                                 currentPosition = temp;
579                                 return false;
580                         }
581                         unicodeAsBackSlash = false;
582                         // if (withoutUnicodePtr != 0)
583                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
584                         return true;
585                         // }
586                 } catch (IndexOutOfBoundsException e) {
587                         unicodeAsBackSlash = false;
588                         currentPosition = temp;
589                         return false;
590                 }
591         }
592
593         public final int getNextChar(char testedChar1, char testedChar2) {
594                 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
595                 // test can be done with (x==0) for the first and (x>0) for the second
596                 // handle the case of unicode.
597                 // when a unicode appears then we must use a buffer that holds char
598                 // internal values
599                 // At the end of this method currentCharacter holds the new visited char
600                 // and currentPosition points right next after it
601                 // Both previous lines are true if the currentCharacter is == to the
602                 // testedChar1/2
603                 // On false, no side effect has occured.
604                 // ALL getNextChar.... ARE OPTIMIZED COPIES
605                 int temp = currentPosition;
606                 try {
607                         int result;
608                         currentCharacter = source[currentPosition++];
609                         // if (((currentCharacter = source[currentPosition++]) == '\\')
610                         // && (source[currentPosition] == 'u')) {
611                         // //-------------unicode traitement ------------
612                         // int c1, c2, c3, c4;
613                         // int unicodeSize = 6;
614                         // currentPosition++;
615                         // while (source[currentPosition] == 'u') {
616                         // currentPosition++;
617                         // unicodeSize++;
618                         // }
619                         //
620                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
621                         // || c1 < 0)
622                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
623                         // || c2 < 0)
624                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
625                         // || c3 < 0)
626                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
627                         // || c4 < 0)) {
628                         // currentPosition = temp;
629                         // return 2;
630                         // }
631                         //
632                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
633                         // if (currentCharacter == testedChar1)
634                         // result = 0;
635                         // else if (currentCharacter == testedChar2)
636                         // result = 1;
637                         // else {
638                         // currentPosition = temp;
639                         // return -1;
640                         // }
641                         //
642                         // //need the unicode buffer
643                         // if (withoutUnicodePtr == 0) {
644                         // //buffer all the entries that have been left aside....
645                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
646                         // System.arraycopy(
647                         // source,
648                         // startPosition,
649                         // withoutUnicodeBuffer,
650                         // 1,
651                         // withoutUnicodePtr);
652                         // }
653                         // //fill the buffer with the char
654                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
655                         // return result;
656                         // } //-------------end unicode traitement--------------
657                         // else {
658                         if (currentCharacter == testedChar1)
659                                 result = 0;
660                         else if (currentCharacter == testedChar2)
661                                 result = 1;
662                         else {
663                                 currentPosition = temp;
664                                 return -1;
665                         }
666                         // if (withoutUnicodePtr != 0)
667                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
668                         return result;
669                         // }
670                 } catch (IndexOutOfBoundsException e) {
671                         currentPosition = temp;
672                         return -1;
673                 }
674         }
675
676         public final boolean getNextCharAsDigit() {
677                 // BOOLEAN
678                 // handle the case of unicode.
679                 // when a unicode appears then we must use a buffer that holds char
680                 // internal values
681                 // At the end of this method currentCharacter holds the new visited char
682                 // and currentPosition points right next after it
683                 // Both previous lines are true if the currentCharacter is a digit
684                 // On false, no side effect has occured.
685                 // ALL getNextChar.... ARE OPTIMIZED COPIES
686                 int temp = currentPosition;
687                 try {
688                         currentCharacter = source[currentPosition++];
689                         // if (((currentCharacter = source[currentPosition++]) == '\\')
690                         // && (source[currentPosition] == 'u')) {
691                         // //-------------unicode traitement ------------
692                         // int c1, c2, c3, c4;
693                         // int unicodeSize = 6;
694                         // currentPosition++;
695                         // while (source[currentPosition] == 'u') {
696                         // currentPosition++;
697                         // unicodeSize++;
698                         // }
699                         //
700                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
701                         // || c1 < 0)
702                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
703                         // || c2 < 0)
704                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
705                         // || c3 < 0)
706                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
707                         // || c4 < 0)) {
708                         // currentPosition = temp;
709                         // return false;
710                         // }
711                         //
712                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
713                         // if (!Character.isDigit(currentCharacter)) {
714                         // currentPosition = temp;
715                         // return false;
716                         // }
717                         //
718                         // //need the unicode buffer
719                         // if (withoutUnicodePtr == 0) {
720                         // //buffer all the entries that have been left aside....
721                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
722                         // System.arraycopy(
723                         // source,
724                         // startPosition,
725                         // withoutUnicodeBuffer,
726                         // 1,
727                         // withoutUnicodePtr);
728                         // }
729                         // //fill the buffer with the char
730                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
731                         // return true;
732                         // } //-------------end unicode traitement--------------
733                         // else {
734                         if (!Character.isDigit(currentCharacter)) {
735                                 currentPosition = temp;
736                                 return false;
737                         }
738                         // if (withoutUnicodePtr != 0)
739                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
740                         return true;
741                         // }
742                 } catch (IndexOutOfBoundsException e) {
743                         currentPosition = temp;
744                         return false;
745                 }
746         }
747
748         public final boolean getNextCharAsDigit(int radix) {
749                 // BOOLEAN
750                 // handle the case of unicode.
751                 // when a unicode appears then we must use a buffer that holds char
752                 // internal values
753                 // At the end of this method currentCharacter holds the new visited char
754                 // and currentPosition points right next after it
755                 // Both previous lines are true if the currentCharacter is a digit base on
756                 // radix
757                 // On false, no side effect has occured.
758                 // ALL getNextChar.... ARE OPTIMIZED COPIES
759                 int temp = currentPosition;
760                 try {
761                         currentCharacter = source[currentPosition++];
762                         // if (((currentCharacter = source[currentPosition++]) == '\\')
763                         // && (source[currentPosition] == 'u')) {
764                         // //-------------unicode traitement ------------
765                         // int c1, c2, c3, c4;
766                         // int unicodeSize = 6;
767                         // currentPosition++;
768                         // while (source[currentPosition] == 'u') {
769                         // currentPosition++;
770                         // unicodeSize++;
771                         // }
772                         //
773                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
774                         // || c1 < 0)
775                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
776                         // || c2 < 0)
777                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
778                         // || c3 < 0)
779                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
780                         // || c4 < 0)) {
781                         // currentPosition = temp;
782                         // return false;
783                         // }
784                         //
785                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
786                         // if (Character.digit(currentCharacter, radix) == -1) {
787                         // currentPosition = temp;
788                         // return false;
789                         // }
790                         //
791                         // //need the unicode buffer
792                         // if (withoutUnicodePtr == 0) {
793                         // //buffer all the entries that have been left aside....
794                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
795                         // System.arraycopy(
796                         // source,
797                         // startPosition,
798                         // withoutUnicodeBuffer,
799                         // 1,
800                         // withoutUnicodePtr);
801                         // }
802                         // //fill the buffer with the char
803                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
804                         // return true;
805                         // } //-------------end unicode traitement--------------
806                         // else {
807                         if (Character.digit(currentCharacter, radix) == -1) {
808                                 currentPosition = temp;
809                                 return false;
810                         }
811                         // if (withoutUnicodePtr != 0)
812                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
813                         return true;
814                         // }
815                 } catch (IndexOutOfBoundsException e) {
816                         currentPosition = temp;
817                         return false;
818                 }
819         }
820
821         public boolean getNextCharAsJavaIdentifierPart() {
822                 // BOOLEAN
823                 // handle the case of unicode.
824                 // when a unicode appears then we must use a buffer that holds char
825                 // internal values
826                 // At the end of this method currentCharacter holds the new visited char
827                 // and currentPosition points right next after it
828                 // Both previous lines are true if the currentCharacter is a
829                 // JavaIdentifierPart
830                 // On false, no side effect has occured.
831                 // ALL getNextChar.... ARE OPTIMIZED COPIES
832                 int temp = currentPosition;
833                 try {
834                         currentCharacter = source[currentPosition++];
835                         // if (((currentCharacter = source[currentPosition++]) == '\\')
836                         // && (source[currentPosition] == 'u')) {
837                         // //-------------unicode traitement ------------
838                         // int c1, c2, c3, c4;
839                         // int unicodeSize = 6;
840                         // currentPosition++;
841                         // while (source[currentPosition] == 'u') {
842                         // currentPosition++;
843                         // unicodeSize++;
844                         // }
845                         //
846                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
847                         // || c1 < 0)
848                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
849                         // || c2 < 0)
850                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
851                         // || c3 < 0)
852                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
853                         // || c4 < 0)) {
854                         // currentPosition = temp;
855                         // return false;
856                         // }
857                         //
858                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
859                         // if (!isPHPIdentifierPart(currentCharacter)) {
860                         // currentPosition = temp;
861                         // return false;
862                         // }
863                         //
864                         // //need the unicode buffer
865                         // if (withoutUnicodePtr == 0) {
866                         // //buffer all the entries that have been left aside....
867                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
868                         // System.arraycopy(
869                         // source,
870                         // startPosition,
871                         // withoutUnicodeBuffer,
872                         // 1,
873                         // withoutUnicodePtr);
874                         // }
875                         // //fill the buffer with the char
876                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
877                         // return true;
878                         // } //-------------end unicode traitement--------------
879                         // else {
880                         if (!isPHPIdentifierPart(currentCharacter)) {
881                                 currentPosition = temp;
882                                 return false;
883                         }
884                         // if (withoutUnicodePtr != 0)
885                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
886                         return true;
887                         // }
888                 } catch (IndexOutOfBoundsException e) {
889                         currentPosition = temp;
890                         return false;
891                 }
892         }
893
894         public int getCastOrParen() {
895                 int tempPosition = currentPosition;
896                 char tempCharacter = currentCharacter;
897                 int tempToken = TokenNameLPAREN;
898                 boolean found = false;
899                 StringBuffer buf = new StringBuffer();
900                 try {
901                         do {
902                                 currentCharacter = source[currentPosition++];
903                         } while (currentCharacter == ' ' || currentCharacter == '\t');
904                         while (ObviousIdentCharNatures[currentCharacter]==C_LETTER) {
905                                 //      while((currentCharacter >= 'a' && currentCharacter <= 'z') || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
906                                 buf.append(currentCharacter);
907                                 currentCharacter = source[currentPosition++];
908                         }
909                         if (buf.length() >= 3 && buf.length() <= 7) {
910                                 char[] data = buf.toString().toCharArray();
911                                 int index = 0;
912                                 switch (data.length) {
913                                 case 3:
914                                         // int
915                                         if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
916                                                 found = true;
917                                                 tempToken = TokenNameintCAST;
918                                         }
919                                         break;
920                                 case 4:
921                                         // bool real
922                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
923                                                 found = true;
924                                                 tempToken = TokenNameboolCAST;
925                                         } else {
926                                                 index = 0;
927                                                 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
928                                                         found = true;
929                                                         tempToken = TokenNamedoubleCAST;
930                                                 }
931                                         }
932                                         break;
933                                 case 5:
934                                         // array unset float
935                                         if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
936                                                         && (data[++index] == 'y')) {
937                                                 found = true;
938                                                 tempToken = TokenNamearrayCAST;
939                                         } else {
940                                                 index = 0;
941                                                 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
942                                                                 && (data[++index] == 't')) {
943                                                         found = true;
944                                                         tempToken = TokenNameunsetCAST;
945                                                 } else {
946                                                         index = 0;
947                                                         if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
948                                                                         && (data[++index] == 't')) {
949                                                                 found = true;
950                                                                 tempToken = TokenNamedoubleCAST;
951                                                         }
952                                                 }
953                                         }
954                                         break;
955                                 case 6:
956                                         // object string double
957                                         if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
958                                                         && (data[++index] == 'c') && (data[++index] == 't')) {
959                                                 found = true;
960                                                 tempToken = TokenNameobjectCAST;
961                                         } else {
962                                                 index = 0;
963                                                 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
964                                                                 && (data[++index] == 'n') && (data[++index] == 'g')) {
965                                                         found = true;
966                                                         tempToken = TokenNamestringCAST;
967                                                 } else {
968                                                         index = 0;
969                                                         if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
970                                                                         && (data[++index] == 'l') && (data[++index] == 'e')) {
971                                                                 found = true;
972                                                                 tempToken = TokenNamedoubleCAST;
973                                                         }
974                                                 }
975                                         }
976                                         break;
977                                 case 7:
978                                         // boolean integer
979                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
980                                                         && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
981                                                 found = true;
982                                                 tempToken = TokenNameboolCAST;
983                                         } else {
984                                                 index = 0;
985                                                 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
986                                                                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
987                                                         found = true;
988                                                         tempToken = TokenNameintCAST;
989                                                 }
990                                         }
991                                         break;
992                                 }
993                                 if (found) {
994                                         while (currentCharacter == ' ' || currentCharacter == '\t') {
995                                                 currentCharacter = source[currentPosition++];
996                                         }
997                                         if (currentCharacter == ')') {
998                                                 return tempToken;
999                                         }
1000                                 }
1001                         }
1002                 } catch (IndexOutOfBoundsException e) {
1003                 }
1004                 currentCharacter = tempCharacter;
1005                 currentPosition = tempPosition;
1006                 return TokenNameLPAREN;
1007         }
1008
1009         public void consumeStringInterpolated() throws InvalidInputException {
1010                 try {
1011                         // consume next character
1012                         unicodeAsBackSlash = false;
1013                         currentCharacter = source[currentPosition++];
1014                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1015                         // && (source[currentPosition] == 'u')) {
1016                         // getNextUnicodeChar();
1017                         // } else {
1018                         // if (withoutUnicodePtr != 0) {
1019                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1020                         // currentCharacter;
1021                         // }
1022                         // }
1023                         while (currentCharacter != '`') {
1024                                 /** ** in PHP \r and \n are valid in string literals *** */
1025                                 // if ((currentCharacter == '\n')
1026                                 // || (currentCharacter == '\r')) {
1027                                 // // relocate if finding another quote fairly close: thus unicode
1028                                 // '/u000D' will be fully consumed
1029                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1030                                 // if (currentPosition + lookAhead == source.length)
1031                                 // break;
1032                                 // if (source[currentPosition + lookAhead] == '\n')
1033                                 // break;
1034                                 // if (source[currentPosition + lookAhead] == '\"') {
1035                                 // currentPosition += lookAhead + 1;
1036                                 // break;
1037                                 // }
1038                                 // }
1039                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1040                                 // }
1041                                 if (currentCharacter == '\\') {
1042                                         int escapeSize = currentPosition;
1043                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1044                                         // scanEscapeCharacter make a side effect on this value and we need
1045                                         // the previous value few lines down this one
1046                                         scanDoubleQuotedEscapeCharacter();
1047                                         escapeSize = currentPosition - escapeSize;
1048                                         if (withoutUnicodePtr == 0) {
1049                                                 // buffer all the entries that have been left aside....
1050                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1051                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1052                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1053                                         } else { // overwrite the / in the buffer
1054                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1055                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1056                                                         // where only one is correct
1057                                                         withoutUnicodePtr--;
1058                                                 }
1059                                         }
1060                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1061                                         if (recordLineSeparator) {
1062                                                 pushLineSeparator();
1063                                         }
1064                                 }
1065                                 // consume next character
1066                                 unicodeAsBackSlash = false;
1067                                 currentCharacter = source[currentPosition++];
1068                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1069                                 // && (source[currentPosition] == 'u')) {
1070                                 // getNextUnicodeChar();
1071                                 // } else {
1072                                 if (withoutUnicodePtr != 0) {
1073                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1074                                 }
1075                                 // }
1076                         }
1077                 } catch (IndexOutOfBoundsException e) {
1078                         // reset end position for error reporting
1079                         currentPosition -= 2;
1080                         throw new InvalidInputException(UNTERMINATED_STRING);
1081                 } catch (InvalidInputException e) {
1082                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1083                                 // relocate if finding another quote fairly close: thus unicode
1084                                 // '/u000D' will be fully consumed
1085                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1086                                         if (currentPosition + lookAhead == source.length)
1087                                                 break;
1088                                         if (source[currentPosition + lookAhead] == '\n')
1089                                                 break;
1090                                         if (source[currentPosition + lookAhead] == '`') {
1091                                                 currentPosition += lookAhead + 1;
1092                                                 break;
1093                                         }
1094                                 }
1095                         }
1096                         throw e; // rethrow
1097                 }
1098                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1099                         // //$NON-NLS-?$ where ? is an
1100                         // int.
1101                         if (currentLine == null) {
1102                                 currentLine = new NLSLine();
1103                                 lines.add(currentLine);
1104                         }
1105                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1106                 }
1107         }
1108
1109         public void consumeStringConstant() throws InvalidInputException {
1110                 try {
1111                         // consume next character
1112                         unicodeAsBackSlash = false;
1113                         currentCharacter = source[currentPosition++];
1114                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1115                         // && (source[currentPosition] == 'u')) {
1116                         // getNextUnicodeChar();
1117                         // } else {
1118                         // if (withoutUnicodePtr != 0) {
1119                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1120                         // currentCharacter;
1121                         // }
1122                         // }
1123                         while (currentCharacter != '\'') {
1124                                 /** ** in PHP \r and \n are valid in string literals *** */
1125                                 // if ((currentCharacter == '\n')
1126                                 // || (currentCharacter == '\r')) {
1127                                 // // relocate if finding another quote fairly close: thus unicode
1128                                 // '/u000D' will be fully consumed
1129                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1130                                 // if (currentPosition + lookAhead == source.length)
1131                                 // break;
1132                                 // if (source[currentPosition + lookAhead] == '\n')
1133                                 // break;
1134                                 // if (source[currentPosition + lookAhead] == '\"') {
1135                                 // currentPosition += lookAhead + 1;
1136                                 // break;
1137                                 // }
1138                                 // }
1139                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1140                                 // }
1141                                 if (currentCharacter == '\\') {
1142                                         int escapeSize = currentPosition;
1143                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1144                                         // scanEscapeCharacter make a side effect on this value and we need
1145                                         // the previous value few lines down this one
1146                                         scanSingleQuotedEscapeCharacter();
1147                                         escapeSize = currentPosition - escapeSize;
1148                                         if (withoutUnicodePtr == 0) {
1149                                                 // buffer all the entries that have been left aside....
1150                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1151                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1152                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1153                                         } else { // overwrite the / in the buffer
1154                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1155                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1156                                                         // where only one is correct
1157                                                         withoutUnicodePtr--;
1158                                                 }
1159                                         }
1160                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1161                                         if (recordLineSeparator) {
1162                                                 pushLineSeparator();
1163                                         }
1164                                 }
1165                                 // consume next character
1166                                 unicodeAsBackSlash = false;
1167                                 currentCharacter = source[currentPosition++];
1168                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1169                                 // && (source[currentPosition] == 'u')) {
1170                                 // getNextUnicodeChar();
1171                                 // } else {
1172                                 if (withoutUnicodePtr != 0) {
1173                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1174                                 }
1175                                 // }
1176                         }
1177                 } catch (IndexOutOfBoundsException e) {
1178                         // reset end position for error reporting
1179                         currentPosition -= 2;
1180                         throw new InvalidInputException(UNTERMINATED_STRING);
1181                 } catch (InvalidInputException e) {
1182                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1183                                 // relocate if finding another quote fairly close: thus unicode
1184                                 // '/u000D' will be fully consumed
1185                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1186                                         if (currentPosition + lookAhead == source.length)
1187                                                 break;
1188                                         if (source[currentPosition + lookAhead] == '\n')
1189                                                 break;
1190                                         if (source[currentPosition + lookAhead] == '\'') {
1191                                                 currentPosition += lookAhead + 1;
1192                                                 break;
1193                                         }
1194                                 }
1195                         }
1196                         throw e; // rethrow
1197                 }
1198                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1199                         // //$NON-NLS-?$ where ? is an
1200                         // int.
1201                         if (currentLine == null) {
1202                                 currentLine = new NLSLine();
1203                                 lines.add(currentLine);
1204                         }
1205                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1206                 }
1207         }
1208
1209         public void consumeStringLiteral() throws InvalidInputException {
1210                 try {
1211                         boolean openDollarBrace = false;
1212                         // consume next character
1213                         unicodeAsBackSlash = false;
1214                         currentCharacter = source[currentPosition++];
1215                         while (currentCharacter != '"' || openDollarBrace) {
1216                                 /** ** in PHP \r and \n are valid in string literals *** */
1217                                 if (currentCharacter == '\\') {
1218                                         int escapeSize = currentPosition;
1219                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1220                                         // scanEscapeCharacter make a side effect on this value and we need
1221                                         // the previous value few lines down this one
1222                                         scanDoubleQuotedEscapeCharacter();
1223                                         escapeSize = currentPosition - escapeSize;
1224                                         if (withoutUnicodePtr == 0) {
1225                                                 // buffer all the entries that have been left aside....
1226                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1227                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1228                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1229                                         } else { // overwrite the / in the buffer
1230                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1231                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1232                                                         // where only one is correct
1233                                                         withoutUnicodePtr--;
1234                                                 }
1235                                         }
1236                                 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1237                                         openDollarBrace = true;
1238                                 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1239                                         openDollarBrace = true;
1240                                 } else if (currentCharacter == '}') {
1241                                         openDollarBrace = false;
1242                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1243                                         if (recordLineSeparator) {
1244                                                 pushLineSeparator();
1245                                         }
1246                                 }
1247                                 // consume next character
1248                                 unicodeAsBackSlash = false;
1249                                 currentCharacter = source[currentPosition++];
1250                                 if (withoutUnicodePtr != 0) {
1251                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1252                                 }
1253                         }
1254                 } catch (IndexOutOfBoundsException e) {
1255                         // reset end position for error reporting
1256                         currentPosition -= 2;
1257                         throw new InvalidInputException(UNTERMINATED_STRING);
1258                 } catch (InvalidInputException e) {
1259                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1260                                 // relocate if finding another quote fairly close: thus unicode
1261                                 // '/u000D' will be fully consumed
1262                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1263                                         if (currentPosition + lookAhead == source.length)
1264                                                 break;
1265                                         if (source[currentPosition + lookAhead] == '\n')
1266                                                 break;
1267                                         if (source[currentPosition + lookAhead] == '\"') {
1268                                                 currentPosition += lookAhead + 1;
1269                                                 break;
1270                                         }
1271                                 }
1272                         }
1273                         throw e; // rethrow
1274                 }
1275                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1276                         // //$NON-NLS-?$ where ? is an
1277                         // int.
1278                         if (currentLine == null) {
1279                                 currentLine = new NLSLine();
1280                                 lines.add(currentLine);
1281                         }
1282                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1283                 }
1284         }
1285
1286         public int getNextToken() throws InvalidInputException {
1287                 phpExpressionTag = false;
1288                 if (!phpMode) {
1289                         return getInlinedHTMLToken(currentPosition);
1290                 }
1291                 if (phpMode) {
1292                         this.wasAcr = false;
1293                         if (diet) {
1294                                 jumpOverMethodBody();
1295                                 diet = false;
1296                                 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1297                         }
1298                         try {
1299                                 while (true) {
1300                                         withoutUnicodePtr = 0;
1301                                         // start with a new token
1302                                         char encapsedChar = ' ';
1303                                         // if (!encapsedStringStack.isEmpty()) {
1304                                         // encapsedChar = ((Character)
1305                                         // encapsedStringStack.peek()).charValue();
1306                                         // }
1307                                         // if (encapsedChar != '$' && encapsedChar != ' ') {
1308                                         // currentCharacter = source[currentPosition++];
1309                                         // if (currentCharacter == encapsedChar) {
1310                                         // switch (currentCharacter) {
1311                                         // case '`':
1312                                         // return TokenNameEncapsedString0;
1313                                         // case '\'':
1314                                         // return TokenNameEncapsedString1;
1315                                         // case '"':
1316                                         // return TokenNameEncapsedString2;
1317                                         // }
1318                                         // }
1319                                         // while (currentCharacter != encapsedChar) {
1320                                         // /** ** in PHP \r and \n are valid in string literals *** */
1321                                         // switch (currentCharacter) {
1322                                         // case '\\':
1323                                         // int escapeSize = currentPosition;
1324                                         // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1325                                         // //scanEscapeCharacter make a side effect on this value and
1326                                         // // we need the previous value few lines down this one
1327                                         // scanDoubleQuotedEscapeCharacter();
1328                                         // escapeSize = currentPosition - escapeSize;
1329                                         // if (withoutUnicodePtr == 0) {
1330                                         // //buffer all the entries that have been left aside....
1331                                         // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1332                                         // startPosition;
1333                                         // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1334                                         // withoutUnicodePtr);
1335                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1336                                         // } else { //overwrite the / in the buffer
1337                                         // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1338                                         // if (backSlashAsUnicodeInString) { //there are TWO \ in
1339                                         // withoutUnicodePtr--;
1340                                         // }
1341                                         // }
1342                                         // break;
1343                                         // case '\r':
1344                                         // case '\n':
1345                                         // if (recordLineSeparator) {
1346                                         // pushLineSeparator();
1347                                         // }
1348                                         // break;
1349                                         // case '$':
1350                                         // if (isPHPIdentifierStart(source[currentPosition]) ||
1351                                         // source[currentPosition] == '{') {
1352                                         // currentPosition--;
1353                                         // encapsedStringStack.push(new Character('$'));
1354                                         // return TokenNameSTRING;
1355                                         // }
1356                                         // break;
1357                                         // case '{':
1358                                         // if (source[currentPosition] == '$') { // CURLY_OPEN
1359                                         // currentPosition--;
1360                                         // encapsedStringStack.push(new Character('$'));
1361                                         // return TokenNameSTRING;
1362                                         // }
1363                                         // }
1364                                         // // consume next character
1365                                         // unicodeAsBackSlash = false;
1366                                         // currentCharacter = source[currentPosition++];
1367                                         // if (withoutUnicodePtr != 0) {
1368                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1369                                         // }
1370                                         // // }
1371                                         // } // end while
1372                                         // currentPosition--;
1373                                         // return TokenNameSTRING;
1374                                         // }
1375                                         // ---------Consume white space and handles startPosition---------
1376                                         int whiteStart = currentPosition;
1377                                         startPosition = currentPosition;
1378                                         currentCharacter = source[currentPosition++];
1379                                         // if (encapsedChar == '$') {
1380                                         // switch (currentCharacter) {
1381                                         // case '\\':
1382                                         // currentCharacter = source[currentPosition++];
1383                                         // return TokenNameSTRING;
1384                                         // case '{':
1385                                         // if (encapsedChar == '$') {
1386                                         // if (getNextChar('$'))
1387                                         // return TokenNameLBRACE_DOLLAR;
1388                                         // }
1389                                         // return TokenNameLBRACE;
1390                                         // case '}':
1391                                         // return TokenNameRBRACE;
1392                                         // case '[':
1393                                         // return TokenNameLBRACKET;
1394                                         // case ']':
1395                                         // return TokenNameRBRACKET;
1396                                         // case '\'':
1397                                         // if (tokenizeStrings) {
1398                                         // consumeStringConstant();
1399                                         // return TokenNameStringSingleQuote;
1400                                         // }
1401                                         // return TokenNameEncapsedString1;
1402                                         // case '"':
1403                                         // return TokenNameEncapsedString2;
1404                                         // case '`':
1405                                         // if (tokenizeStrings) {
1406                                         // consumeStringInterpolated();
1407                                         // return TokenNameStringInterpolated;
1408                                         // }
1409                                         // return TokenNameEncapsedString0;
1410                                         // case '-':
1411                                         // if (getNextChar('>'))
1412                                         // return TokenNameMINUS_GREATER;
1413                                         // return TokenNameSTRING;
1414                                         // default:
1415                                         // if (currentCharacter == '$') {
1416                                         // int oldPosition = currentPosition;
1417                                         // try {
1418                                         // currentCharacter = source[currentPosition++];
1419                                         // if (currentCharacter == '{') {
1420                                         // return TokenNameDOLLAR_LBRACE;
1421                                         // }
1422                                         // if (isPHPIdentifierStart(currentCharacter)) {
1423                                         // return scanIdentifierOrKeyword(true);
1424                                         // } else {
1425                                         // currentPosition = oldPosition;
1426                                         // return TokenNameSTRING;
1427                                         // }
1428                                         // } catch (IndexOutOfBoundsException e) {
1429                                         // currentPosition = oldPosition;
1430                                         // return TokenNameSTRING;
1431                                         // }
1432                                         // }
1433                                         // if (isPHPIdentifierStart(currentCharacter))
1434                                         // return scanIdentifierOrKeyword(false);
1435                                         // if (Character.isDigit(currentCharacter))
1436                                         // return scanNumber(false);
1437                                         // return TokenNameERROR;
1438                                         // }
1439                                         // }
1440                                         // boolean isWhiteSpace;
1441
1442                                         while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1443                                                 startPosition = currentPosition;
1444                                                 currentCharacter = source[currentPosition++];
1445                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1446                                                 // && (source[currentPosition] == 'u')) {
1447                                                 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1448                                                 // } else {
1449                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1450                                                         checkNonExternalizeString();
1451                                                         if (recordLineSeparator) {
1452                                                                 pushLineSeparator();
1453                                                         } else {
1454                                                                 currentLine = null;
1455                                                         }
1456                                                 }
1457                                                 // isWhiteSpace = (currentCharacter == ' ')
1458                                                 // || Character.isWhitespace(currentCharacter);
1459                                                 // }
1460                                         }
1461                                         if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1462                                                 // reposition scanner in case we are interested by spaces as tokens
1463                                                 currentPosition--;
1464                                                 startPosition = whiteStart;
1465                                                 return TokenNameWHITESPACE;
1466                                         }
1467                                         // little trick to get out in the middle of a source compuation
1468                                         if (currentPosition > eofPosition)
1469                                                 return TokenNameEOF;
1470                                         // ---------Identify the next token-------------
1471                                         switch (currentCharacter) {
1472                                         case '(':
1473                                                 return getCastOrParen();
1474                                         case ')':
1475                                                 return TokenNameRPAREN;
1476                                         case '{':
1477                                                 return TokenNameLBRACE;
1478                                         case '}':
1479                                                 return TokenNameRBRACE;
1480                                         case '[':
1481                                                 return TokenNameLBRACKET;
1482                                         case ']':
1483                                                 return TokenNameRBRACKET;
1484                                         case ';':
1485                                                 return TokenNameSEMICOLON;
1486                                         case ',':
1487                                                 return TokenNameCOMMA;
1488                                         case '.':
1489                                                 if (getNextChar('='))
1490                                                         return TokenNameDOT_EQUAL;
1491                                                 if (getNextCharAsDigit())
1492                                                         return scanNumber(true);
1493                                                 return TokenNameDOT;
1494                                         case '+': {
1495                                                 int test;
1496                                                 if ((test = getNextChar('+', '=')) == 0)
1497                                                         return TokenNamePLUS_PLUS;
1498                                                 if (test > 0)
1499                                                         return TokenNamePLUS_EQUAL;
1500                                                 return TokenNamePLUS;
1501                                         }
1502                                         case '-': {
1503                                                 int test;
1504                                                 if ((test = getNextChar('-', '=')) == 0)
1505                                                         return TokenNameMINUS_MINUS;
1506                                                 if (test > 0)
1507                                                         return TokenNameMINUS_EQUAL;
1508                                                 if (getNextChar('>'))
1509                                                         return TokenNameMINUS_GREATER;
1510                                                 return TokenNameMINUS;
1511                                         }
1512                                         case '~':
1513                                                 if (getNextChar('='))
1514                                                         return TokenNameTWIDDLE_EQUAL;
1515                                                 return TokenNameTWIDDLE;
1516                                         case '!':
1517                                                 if (getNextChar('=')) {
1518                                                         if (getNextChar('=')) {
1519                                                                 return TokenNameNOT_EQUAL_EQUAL;
1520                                                         }
1521                                                         return TokenNameNOT_EQUAL;
1522                                                 }
1523                                                 return TokenNameNOT;
1524                                         case '*':
1525                                                 if (getNextChar('='))
1526                                                         return TokenNameMULTIPLY_EQUAL;
1527                                                 return TokenNameMULTIPLY;
1528                                         case '%':
1529                                                 if (getNextChar('='))
1530                                                         return TokenNameREMAINDER_EQUAL;
1531                                                 return TokenNameREMAINDER;
1532                                         case '<': {
1533                                                 int oldPosition = currentPosition;
1534                                                 try {
1535                                                         currentCharacter = source[currentPosition++];
1536                                                 } catch (IndexOutOfBoundsException e) {
1537                                                         currentPosition = oldPosition;
1538                                                         return TokenNameLESS;
1539                                                 }
1540                                                 switch (currentCharacter) {
1541                                                 case '=':
1542                                                         return TokenNameLESS_EQUAL;
1543                                                 case '>':
1544                                                         return TokenNameNOT_EQUAL;
1545                                                 case '<':
1546                                                         if (getNextChar('='))
1547                                                                 return TokenNameLEFT_SHIFT_EQUAL;
1548                                                         if (getNextChar('<')) {
1549                                                                 currentCharacter = source[currentPosition++];
1550                                                                 while (Character.isWhitespace(currentCharacter)) {
1551                                                                         currentCharacter = source[currentPosition++];
1552                                                                 }
1553                                                                 int heredocStart = currentPosition - 1;
1554                                                                 int heredocLength = 0;
1555                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1556                                                                         currentCharacter = source[currentPosition++];
1557                                                                 } else {
1558                                                                         return TokenNameERROR;
1559                                                                 }
1560                                                                 while (isPHPIdentifierPart(currentCharacter)) {
1561                                                                         currentCharacter = source[currentPosition++];
1562                                                                 }
1563                                                                 heredocLength = currentPosition - heredocStart - 1;
1564                                                                 // heredoc end-tag determination
1565                                                                 boolean endTag = true;
1566                                                                 char ch;
1567                                                                 do {
1568                                                                         ch = source[currentPosition++];
1569                                                                         if (ch == '\r' || ch == '\n') {
1570                                                                                 if (recordLineSeparator) {
1571                                                                                         pushLineSeparator();
1572                                                                                 } else {
1573                                                                                         currentLine = null;
1574                                                                                 }
1575                                                                                 for (int i = 0; i < heredocLength; i++) {
1576                                                                                         if (source[currentPosition + i] != source[heredocStart + i]) {
1577                                                                                                 endTag = false;
1578                                                                                                 break;
1579                                                                                         }
1580                                                                                 }
1581                                                                                 if (endTag) {
1582                                                                                         currentPosition += heredocLength - 1;
1583                                                                                         currentCharacter = source[currentPosition++];
1584                                                                                         break; // do...while loop
1585                                                                                 } else {
1586                                                                                         endTag = true;
1587                                                                                 }
1588                                                                         }
1589                                                                 } while (true);
1590                                                                 return TokenNameHEREDOC;
1591                                                         }
1592                                                         return TokenNameLEFT_SHIFT;
1593                                                 }
1594                                                 currentPosition = oldPosition;
1595                                                 return TokenNameLESS;
1596                                         }
1597                                         case '>': {
1598                                                 int test;
1599                                                 if ((test = getNextChar('=', '>')) == 0)
1600                                                         return TokenNameGREATER_EQUAL;
1601                                                 if (test > 0) {
1602                                                         if ((test = getNextChar('=', '>')) == 0)
1603                                                                 return TokenNameRIGHT_SHIFT_EQUAL;
1604                                                         return TokenNameRIGHT_SHIFT;
1605                                                 }
1606                                                 return TokenNameGREATER;
1607                                         }
1608                                         case '=':
1609                                                 if (getNextChar('=')) {
1610                                                         if (getNextChar('=')) {
1611                                                                 return TokenNameEQUAL_EQUAL_EQUAL;
1612                                                         }
1613                                                         return TokenNameEQUAL_EQUAL;
1614                                                 }
1615                                                 if (getNextChar('>'))
1616                                                         return TokenNameEQUAL_GREATER;
1617                                                 return TokenNameEQUAL;
1618                                         case '&': {
1619                                                 int test;
1620                                                 if ((test = getNextChar('&', '=')) == 0)
1621                                                         return TokenNameAND_AND;
1622                                                 if (test > 0)
1623                                                         return TokenNameAND_EQUAL;
1624                                                 return TokenNameAND;
1625                                         }
1626                                         case '|': {
1627                                                 int test;
1628                                                 if ((test = getNextChar('|', '=')) == 0)
1629                                                         return TokenNameOR_OR;
1630                                                 if (test > 0)
1631                                                         return TokenNameOR_EQUAL;
1632                                                 return TokenNameOR;
1633                                         }
1634                                         case '^':
1635                                                 if (getNextChar('='))
1636                                                         return TokenNameXOR_EQUAL;
1637                                                 return TokenNameXOR;
1638                                         case '?':
1639                                                 if (getNextChar('>')) {
1640                                                         phpMode = false;
1641                                                         if (currentPosition == source.length) {
1642                                                                 phpMode = true;
1643                                                                 return TokenNameINLINE_HTML;
1644                                                         }
1645                                                         return getInlinedHTMLToken(currentPosition - 2);
1646                                                 }
1647                                                 return TokenNameQUESTION;
1648                                         case ':':
1649                                                 if (getNextChar(':'))
1650                                                         return TokenNamePAAMAYIM_NEKUDOTAYIM;
1651                                                 return TokenNameCOLON;
1652                                         case '@':
1653                                                 return TokenNameAT;
1654                                         case '\'':
1655                                                 consumeStringConstant();
1656                                                 return TokenNameStringSingleQuote;
1657                                         case '"':
1658                                                 // if (tokenizeStrings) {
1659                                                 consumeStringLiteral();
1660                                                 return TokenNameStringDoubleQuote;
1661                                         // }
1662                                         // return TokenNameEncapsedString2;
1663                                         case '`':
1664                                                 // if (tokenizeStrings) {
1665                                                 consumeStringInterpolated();
1666                                                 return TokenNameStringInterpolated;
1667                                         // }
1668                                         // return TokenNameEncapsedString0;
1669                                         case '#':
1670                                         case '/': {
1671                                                 char startChar = currentCharacter;
1672                                                 if (getNextChar('=') && startChar == '/') {
1673                                                         return TokenNameDIVIDE_EQUAL;
1674                                                 }
1675                                                 int test;
1676                                                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1677                                                         // line comment
1678                                                         this.lastCommentLinePosition = this.currentPosition;
1679                                                         int endPositionForLineComment = 0;
1680                                                         try { // get the next char
1681                                                                 currentCharacter = source[currentPosition++];
1682                                                                 // if (((currentCharacter = source[currentPosition++])
1683                                                                 // == '\\')
1684                                                                 // && (source[currentPosition] == 'u')) {
1685                                                                 // //-------------unicode traitement ------------
1686                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1687                                                                 // currentPosition++;
1688                                                                 // while (source[currentPosition] == 'u') {
1689                                                                 // currentPosition++;
1690                                                                 // }
1691                                                                 // if ((c1 =
1692                                                                 // Character.getNumericValue(source[currentPosition++]))
1693                                                                 // > 15
1694                                                                 // || c1 < 0
1695                                                                 // || (c2 =
1696                                                                 // Character.getNumericValue(source[currentPosition++]))
1697                                                                 // > 15
1698                                                                 // || c2 < 0
1699                                                                 // || (c3 =
1700                                                                 // Character.getNumericValue(source[currentPosition++]))
1701                                                                 // > 15
1702                                                                 // || c3 < 0
1703                                                                 // || (c4 =
1704                                                                 // Character.getNumericValue(source[currentPosition++]))
1705                                                                 // > 15
1706                                                                 // || c4 < 0) {
1707                                                                 // throw new
1708                                                                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1709                                                                 // } else {
1710                                                                 // currentCharacter =
1711                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1712                                                                 // }
1713                                                                 // }
1714                                                                 // handle the \\u case manually into comment
1715                                                                 // if (currentCharacter == '\\') {
1716                                                                 // if (source[currentPosition] == '\\')
1717                                                                 // currentPosition++;
1718                                                                 // } //jump over the \\
1719                                                                 boolean isUnicode = false;
1720                                                                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1721                                                                         this.lastCommentLinePosition = this.currentPosition;
1722                                                                         if (currentCharacter == '?') {
1723                                                                                 if (getNextChar('>')) {
1724                                                                                         // ?> breaks line comments
1725                                                                                         startPosition = currentPosition - 2;
1726                                                                                         phpMode = false;
1727                                                                                         return TokenNameINLINE_HTML;
1728                                                                                 }
1729                                                                         }
1730                                                                         // get the next char
1731                                                                         isUnicode = false;
1732                                                                         currentCharacter = source[currentPosition++];
1733                                                                         // if (((currentCharacter = source[currentPosition++])
1734                                                                         // == '\\')
1735                                                                         // && (source[currentPosition] == 'u')) {
1736                                                                         // isUnicode = true;
1737                                                                         // //-------------unicode traitement ------------
1738                                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1739                                                                         // currentPosition++;
1740                                                                         // while (source[currentPosition] == 'u') {
1741                                                                         // currentPosition++;
1742                                                                         // }
1743                                                                         // if ((c1 =
1744                                                                         // Character.getNumericValue(source[currentPosition++]))
1745                                                                         // > 15
1746                                                                         // || c1 < 0
1747                                                                         // || (c2 =
1748                                                                         // Character.getNumericValue(
1749                                                                         // source[currentPosition++]))
1750                                                                         // > 15
1751                                                                         // || c2 < 0
1752                                                                         // || (c3 =
1753                                                                         // Character.getNumericValue(
1754                                                                         // source[currentPosition++]))
1755                                                                         // > 15
1756                                                                         // || c3 < 0
1757                                                                         // || (c4 =
1758                                                                         // Character.getNumericValue(
1759                                                                         // source[currentPosition++]))
1760                                                                         // > 15
1761                                                                         // || c4 < 0) {
1762                                                                         // throw new
1763                                                                         // InvalidInputException(INVALID_UNICODE_ESCAPE);
1764                                                                         // } else {
1765                                                                         // currentCharacter =
1766                                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1767                                                                         // }
1768                                                                         // }
1769                                                                         // handle the \\u case manually into comment
1770                                                                         // if (currentCharacter == '\\') {
1771                                                                         // if (source[currentPosition] == '\\')
1772                                                                         // currentPosition++;
1773                                                                         // } //jump over the \\
1774                                                                 }
1775                                                                 if (isUnicode) {
1776                                                                         endPositionForLineComment = currentPosition - 6;
1777                                                                 } else {
1778                                                                         endPositionForLineComment = currentPosition - 1;
1779                                                                 }
1780                                                                 // recordComment(false);
1781                                                                 recordComment(TokenNameCOMMENT_LINE);
1782                                                                 if (this.taskTags != null)
1783                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1784                                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1785                                                                         checkNonExternalizeString();
1786                                                                         if (recordLineSeparator) {
1787                                                                                 if (isUnicode) {
1788                                                                                         pushUnicodeLineSeparator();
1789                                                                                 } else {
1790                                                                                         pushLineSeparator();
1791                                                                                 }
1792                                                                         } else {
1793                                                                                 currentLine = null;
1794                                                                         }
1795                                                                 }
1796                                                                 if (tokenizeComments) {
1797                                                                         if (!isUnicode) {
1798                                                                                 currentPosition = endPositionForLineComment;
1799                                                                                 // reset one character behind
1800                                                                         }
1801                                                                         return TokenNameCOMMENT_LINE;
1802                                                                 }
1803                                                         } catch (IndexOutOfBoundsException e) { // an eof will them
1804                                                                 // be generated
1805                                                                 if (tokenizeComments) {
1806                                                                         currentPosition--;
1807                                                                         // reset one character behind
1808                                                                         return TokenNameCOMMENT_LINE;
1809                                                                 }
1810                                                         }
1811                                                         break;
1812                                                 }
1813                                                 if (test > 0) {
1814                                                         // traditional and annotation comment
1815                                                         boolean isJavadoc = false, star = false;
1816                                                         // consume next character
1817                                                         unicodeAsBackSlash = false;
1818                                                         currentCharacter = source[currentPosition++];
1819                                                         // if (((currentCharacter = source[currentPosition++]) ==
1820                                                         // '\\')
1821                                                         // && (source[currentPosition] == 'u')) {
1822                                                         // getNextUnicodeChar();
1823                                                         // } else {
1824                                                         // if (withoutUnicodePtr != 0) {
1825                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1826                                                         // currentCharacter;
1827                                                         // }
1828                                                         // }
1829                                                         if (currentCharacter == '*') {
1830                                                                 isJavadoc = true;
1831                                                                 star = true;
1832                                                         }
1833                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1834                                                                 checkNonExternalizeString();
1835                                                                 if (recordLineSeparator) {
1836                                                                         pushLineSeparator();
1837                                                                 } else {
1838                                                                         currentLine = null;
1839                                                                 }
1840                                                         }
1841                                                         try { // get the next char
1842                                                                 currentCharacter = source[currentPosition++];
1843                                                                 // if (((currentCharacter = source[currentPosition++])
1844                                                                 // == '\\')
1845                                                                 // && (source[currentPosition] == 'u')) {
1846                                                                 // //-------------unicode traitement ------------
1847                                                                 // getNextUnicodeChar();
1848                                                                 // }
1849                                                                 // handle the \\u case manually into comment
1850                                                                 // if (currentCharacter == '\\') {
1851                                                                 // if (source[currentPosition] == '\\')
1852                                                                 // currentPosition++;
1853                                                                 // //jump over the \\
1854                                                                 // }
1855                                                                 // empty comment is not a javadoc /**/
1856                                                                 if (currentCharacter == '/') {
1857                                                                         isJavadoc = false;
1858                                                                 }
1859                                                                 // loop until end of comment */
1860                                                                 while ((currentCharacter != '/') || (!star)) {
1861                                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1862                                                                                 checkNonExternalizeString();
1863                                                                                 if (recordLineSeparator) {
1864                                                                                         pushLineSeparator();
1865                                                                                 } else {
1866                                                                                         currentLine = null;
1867                                                                                 }
1868                                                                         }
1869                                                                         star = currentCharacter == '*';
1870                                                                         // get next char
1871                                                                         currentCharacter = source[currentPosition++];
1872                                                                         // if (((currentCharacter = source[currentPosition++])
1873                                                                         // == '\\')
1874                                                                         // && (source[currentPosition] == 'u')) {
1875                                                                         // //-------------unicode traitement ------------
1876                                                                         // getNextUnicodeChar();
1877                                                                         // }
1878                                                                         // handle the \\u case manually into comment
1879                                                                         // if (currentCharacter == '\\') {
1880                                                                         // if (source[currentPosition] == '\\')
1881                                                                         // currentPosition++;
1882                                                                         // } //jump over the \\
1883                                                                 }
1884                                                                 // recordComment(isJavadoc);
1885                                                                 if (isJavadoc) {
1886                                                                         recordComment(TokenNameCOMMENT_PHPDOC);
1887                                                                 } else {
1888                                                                         recordComment(TokenNameCOMMENT_BLOCK);
1889                                                                 }
1890
1891                                                                 if (tokenizeComments) {
1892                                                                         if (isJavadoc)
1893                                                                                 return TokenNameCOMMENT_PHPDOC;
1894                                                                         return TokenNameCOMMENT_BLOCK;
1895                                                                 }
1896
1897                                                                 if (this.taskTags != null) {
1898                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1899                                                                 }
1900                                                         } catch (IndexOutOfBoundsException e) {
1901                                                                 // reset end position for error reporting
1902                                                                 currentPosition -= 2;
1903                                                                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1904                                                         }
1905                                                         break;
1906                                                 }
1907                                                 return TokenNameDIVIDE;
1908                                         }
1909                                         case '\u001a':
1910                                                 if (atEnd())
1911                                                         return TokenNameEOF;
1912                                                 // the atEnd may not be <currentPosition == source.length> if
1913                                                 // source is only some part of a real (external) stream
1914                                                 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1915                                         default:
1916                                                 if (currentCharacter == '$') {
1917                                                         int oldPosition = currentPosition;
1918                                                         try {
1919                                                                 currentCharacter = source[currentPosition++];
1920                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1921                                                                         return scanIdentifierOrKeyword(true);
1922                                                                 } else {
1923                                                                         currentPosition = oldPosition;
1924                                                                         return TokenNameDOLLAR;
1925                                                                 }
1926                                                         } catch (IndexOutOfBoundsException e) {
1927                                                                 currentPosition = oldPosition;
1928                                                                 return TokenNameDOLLAR;
1929                                                         }
1930                                                 }
1931                                                 if (isPHPIdentifierStart(currentCharacter))
1932                                                         return scanIdentifierOrKeyword(false);
1933                                                 if (Character.isDigit(currentCharacter))
1934                                                         return scanNumber(false);
1935                                                 return TokenNameERROR;
1936                                         }
1937                                 }
1938                         } // -----------------end switch while try--------------------
1939                         catch (IndexOutOfBoundsException e) {
1940                         }
1941                 }
1942                 return TokenNameEOF;
1943         }
1944
1945         /**
1946          * @return
1947          * @throws InvalidInputException
1948          */
1949         private int getInlinedHTMLToken(int start) throws InvalidInputException {
1950                 if (currentPosition > source.length) {
1951                         currentPosition = source.length;
1952                         return TokenNameEOF;
1953                 }
1954                 startPosition = start;
1955                 try {
1956                         while (!phpMode) {
1957                                 currentCharacter = source[currentPosition++];
1958                                 if (currentCharacter == '<') {
1959                                         if (getNextChar('?')) {
1960                                                 currentCharacter = source[currentPosition++];
1961                                                 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1962                                                         if (currentCharacter != '=') { // <?=
1963                                                                 currentPosition--;
1964                                                         } else {
1965                                                                 phpExpressionTag = true;
1966                                                         }
1967                                                         // <?
1968                                                         if (ignorePHPOneLiner) { // for CodeFormatter
1969                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1970                                                                         phpMode = true;
1971                                                                         return TokenNameINLINE_HTML;
1972                                                                 }
1973                                                         } else {
1974                                                                 phpMode = true;
1975                                                                 return TokenNameINLINE_HTML;
1976                                                         }
1977                                                 } else {
1978                                                         // boolean phpStart = (currentCharacter == 'P') ||
1979                                                         // (currentCharacter == 'p');
1980                                                         // if (phpStart) {
1981                                                         int test = getNextChar('H', 'h');
1982                                                         if (test >= 0) {
1983                                                                 test = getNextChar('P', 'p');
1984                                                                 if (test >= 0) {
1985                                                                         // <?PHP <?php
1986                                                                         if (ignorePHPOneLiner) {
1987                                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1988                                                                                         phpMode = true;
1989                                                                                         return TokenNameINLINE_HTML;
1990                                                                                 }
1991                                                                         } else {
1992                                                                                 phpMode = true;
1993                                                                                 return TokenNameINLINE_HTML;
1994                                                                         }
1995                                                                 }
1996                                                         }
1997                                                         // }
1998                                                 }
1999                                         }
2000                                 }
2001                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2002                                         if (recordLineSeparator) {
2003                                                 pushLineSeparator();
2004                                         } else {
2005                                                 currentLine = null;
2006                                         }
2007                                 }
2008                         } // -----------------while--------------------
2009                         phpMode = true;
2010                         return TokenNameINLINE_HTML;
2011                 } // -----------------try--------------------
2012                 catch (IndexOutOfBoundsException e) {
2013                         startPosition = start;
2014                         currentPosition--;
2015                 }
2016                 phpMode = true;
2017                 return TokenNameINLINE_HTML;
2018         }
2019
2020         /**
2021          * @return
2022          */
2023         private int lookAheadLinePHPTag() {
2024                 // check if the PHP is only in this line (for CodeFormatter)
2025                 int currentPositionInLine = currentPosition;
2026                 char previousCharInLine = ' ';
2027                 char currentCharInLine = ' ';
2028                 boolean singleQuotedStringActive = false;
2029                 boolean doubleQuotedStringActive = false;
2030
2031                 try {
2032                         // look ahead in this line
2033                         while (true) {
2034                                 previousCharInLine = currentCharInLine;
2035                                 currentCharInLine = source[currentPositionInLine++];
2036                                 switch (currentCharInLine) {
2037                                 case '>':
2038                                         if (previousCharInLine == '?') {
2039                                                 // update the scanner's current Position in the source
2040                                                 currentPosition = currentPositionInLine;
2041                                                 // use as "dummy" token
2042                                                 return TokenNameEOF;
2043                                         }
2044                                         break;
2045                                 case '\\':
2046                                         if (doubleQuotedStringActive) {
2047                                                 // ignore escaped characters in double quoted strings
2048                                                 previousCharInLine = currentCharInLine;
2049                                                 currentCharInLine = source[currentPositionInLine++];
2050                                         }
2051                                 case '\"':
2052                                         if (doubleQuotedStringActive) {
2053                                                 doubleQuotedStringActive = false;
2054                                         } else {
2055                                                 if (!singleQuotedStringActive) {
2056                                                         doubleQuotedStringActive = true;
2057                                                 }
2058                                         }
2059                                         break;
2060                                 case '\'':
2061                                         if (singleQuotedStringActive) {
2062                                                 if (previousCharInLine != '\\') {
2063                                                         singleQuotedStringActive = false;
2064                                                 }
2065                                         } else {
2066                                                 if (!doubleQuotedStringActive) {
2067                                                         singleQuotedStringActive = true;
2068                                                 }
2069                                         }
2070                                         break;
2071                                 case '\n':
2072                                         phpMode = true;
2073                                         return TokenNameINLINE_HTML;
2074                                 case '#':
2075                                         if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2076                                                 phpMode = true;
2077                                                 return TokenNameINLINE_HTML;
2078                                         }
2079                                         break;
2080                                 case '/':
2081                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2082                                                 phpMode = true;
2083                                                 return TokenNameINLINE_HTML;
2084                                         }
2085                                         break;
2086                                 case '*':
2087                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2088                                                 phpMode = true;
2089                                                 return TokenNameINLINE_HTML;
2090                                         }
2091                                         break;
2092                                 }
2093                         }
2094                 } catch (IndexOutOfBoundsException e) {
2095                         phpMode = true;
2096                         currentPosition = currentPositionInLine;
2097                         return TokenNameINLINE_HTML;
2098                 }
2099         }
2100
2101         // public final void getNextUnicodeChar()
2102         // throws IndexOutOfBoundsException, InvalidInputException {
2103         // //VOID
2104         // //handle the case of unicode.
2105         // //when a unicode appears then we must use a buffer that holds char
2106         // internal values
2107         // //At the end of this method currentCharacter holds the new visited char
2108         // //and currentPosition points right next after it
2109         //
2110         // //ALL getNextChar.... ARE OPTIMIZED COPIES
2111         //
2112         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2113         // currentPosition++;
2114         // while (source[currentPosition] == 'u') {
2115         // currentPosition++;
2116         // unicodeSize++;
2117         // }
2118         //
2119         // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2120         // || c1 < 0
2121         // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2122         // || c2 < 0
2123         // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2124         // || c3 < 0
2125         // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2126         // || c4 < 0) {
2127         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2128         // } else {
2129         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2130         // //need the unicode buffer
2131         // if (withoutUnicodePtr == 0) {
2132         // //buffer all the entries that have been left aside....
2133         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2134         // System.arraycopy(
2135         // source,
2136         // startPosition,
2137         // withoutUnicodeBuffer,
2138         // 1,
2139         // withoutUnicodePtr);
2140         // }
2141         // //fill the buffer with the char
2142         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2143         // }
2144         // unicodeAsBackSlash = currentCharacter == '\\';
2145         // }
2146         /*
2147          * Tokenize a method body, assuming that curly brackets are properly balanced.
2148          */
2149         public final void jumpOverMethodBody() {
2150                 this.wasAcr = false;
2151                 int found = 1;
2152                 try {
2153                         while (true) { // loop for jumping over comments
2154                                 // ---------Consume white space and handles startPosition---------
2155                                 boolean isWhiteSpace;
2156                                 do {
2157                                         startPosition = currentPosition;
2158                                         currentCharacter = source[currentPosition++];
2159                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2160                                         // && (source[currentPosition] == 'u')) {
2161                                         // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2162                                         // } else {
2163                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2164                                                 pushLineSeparator();
2165                                         isWhiteSpace = Character.isWhitespace(currentCharacter);
2166                                         // }
2167                                 } while (isWhiteSpace);
2168                                 // -------consume token until } is found---------
2169                                 switch (currentCharacter) {
2170                                 case '{':
2171                                         found++;
2172                                         break;
2173                                 case '}':
2174                                         found--;
2175                                         if (found == 0)
2176                                                 return;
2177                                         break;
2178                                 case '\'': {
2179                                         boolean test;
2180                                         test = getNextChar('\\');
2181                                         if (test) {
2182                                                 try {
2183                                                         scanDoubleQuotedEscapeCharacter();
2184                                                 } catch (InvalidInputException ex) {
2185                                                 }
2186                                                 ;
2187                                         } else {
2188                                                 // try { // consume next character
2189                                                 unicodeAsBackSlash = false;
2190                                                 currentCharacter = source[currentPosition++];
2191                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2192                                                 // && (source[currentPosition] == 'u')) {
2193                                                 // getNextUnicodeChar();
2194                                                 // } else {
2195                                                 if (withoutUnicodePtr != 0) {
2196                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2197                                                 }
2198                                                 // }
2199                                                 // } catch (InvalidInputException ex) {
2200                                                 // };
2201                                         }
2202                                         getNextChar('\'');
2203                                         break;
2204                                 }
2205                                 case '"':
2206                                         try {
2207                                                 // try { // consume next character
2208                                                 unicodeAsBackSlash = false;
2209                                                 currentCharacter = source[currentPosition++];
2210                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2211                                                 // && (source[currentPosition] == 'u')) {
2212                                                 // getNextUnicodeChar();
2213                                                 // } else {
2214                                                 if (withoutUnicodePtr != 0) {
2215                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2216                                                 }
2217                                                 // }
2218                                                 // } catch (InvalidInputException ex) {
2219                                                 // };
2220                                                 while (currentCharacter != '"') {
2221                                                         if (currentCharacter == '\r') {
2222                                                                 if (source[currentPosition] == '\n')
2223                                                                         currentPosition++;
2224                                                                 break;
2225                                                                 // the string cannot go further that the line
2226                                                         }
2227                                                         if (currentCharacter == '\n') {
2228                                                                 break;
2229                                                                 // the string cannot go further that the line
2230                                                         }
2231                                                         if (currentCharacter == '\\') {
2232                                                                 try {
2233                                                                         scanDoubleQuotedEscapeCharacter();
2234                                                                 } catch (InvalidInputException ex) {
2235                                                                 }
2236                                                                 ;
2237                                                         }
2238                                                         // try { // consume next character
2239                                                         unicodeAsBackSlash = false;
2240                                                         currentCharacter = source[currentPosition++];
2241                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2242                                                         // && (source[currentPosition] == 'u')) {
2243                                                         // getNextUnicodeChar();
2244                                                         // } else {
2245                                                         if (withoutUnicodePtr != 0) {
2246                                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2247                                                         }
2248                                                         // }
2249                                                         // } catch (InvalidInputException ex) {
2250                                                         // };
2251                                                 }
2252                                         } catch (IndexOutOfBoundsException e) {
2253                                                 return;
2254                                         }
2255                                         break;
2256                                 case '/': {
2257                                         int test;
2258                                         if ((test = getNextChar('/', '*')) == 0) {
2259                                                 // line comment
2260                                                 try {
2261                                                         // get the next char
2262                                                         currentCharacter = source[currentPosition++];
2263                                                         // if (((currentCharacter = source[currentPosition++]) ==
2264                                                         // '\\')
2265                                                         // && (source[currentPosition] == 'u')) {
2266                                                         // //-------------unicode traitement ------------
2267                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2268                                                         // currentPosition++;
2269                                                         // while (source[currentPosition] == 'u') {
2270                                                         // currentPosition++;
2271                                                         // }
2272                                                         // if ((c1 =
2273                                                         // Character.getNumericValue(source[currentPosition++]))
2274                                                         // > 15
2275                                                         // || c1 < 0
2276                                                         // || (c2 =
2277                                                         // Character.getNumericValue(source[currentPosition++]))
2278                                                         // > 15
2279                                                         // || c2 < 0
2280                                                         // || (c3 =
2281                                                         // Character.getNumericValue(source[currentPosition++]))
2282                                                         // > 15
2283                                                         // || c3 < 0
2284                                                         // || (c4 =
2285                                                         // Character.getNumericValue(source[currentPosition++]))
2286                                                         // > 15
2287                                                         // || c4 < 0) {
2288                                                         // //error don't care of the value
2289                                                         // currentCharacter = 'A';
2290                                                         // } //something different from \n and \r
2291                                                         // else {
2292                                                         // currentCharacter =
2293                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2294                                                         // }
2295                                                         // }
2296                                                         while (currentCharacter != '\r' && currentCharacter != '\n') {
2297                                                                 // get the next char
2298                                                                 currentCharacter = source[currentPosition++];
2299                                                                 // if (((currentCharacter = source[currentPosition++])
2300                                                                 // == '\\')
2301                                                                 // && (source[currentPosition] == 'u')) {
2302                                                                 // //-------------unicode traitement ------------
2303                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2304                                                                 // currentPosition++;
2305                                                                 // while (source[currentPosition] == 'u') {
2306                                                                 // currentPosition++;
2307                                                                 // }
2308                                                                 // if ((c1 =
2309                                                                 // Character.getNumericValue(source[currentPosition++]))
2310                                                                 // > 15
2311                                                                 // || c1 < 0
2312                                                                 // || (c2 =
2313                                                                 // Character.getNumericValue(source[currentPosition++]))
2314                                                                 // > 15
2315                                                                 // || c2 < 0
2316                                                                 // || (c3 =
2317                                                                 // Character.getNumericValue(source[currentPosition++]))
2318                                                                 // > 15
2319                                                                 // || c3 < 0
2320                                                                 // || (c4 =
2321                                                                 // Character.getNumericValue(source[currentPosition++]))
2322                                                                 // > 15
2323                                                                 // || c4 < 0) {
2324                                                                 // //error don't care of the value
2325                                                                 // currentCharacter = 'A';
2326                                                                 // } //something different from \n and \r
2327                                                                 // else {
2328                                                                 // currentCharacter =
2329                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2330                                                                 // }
2331                                                                 // }
2332                                                         }
2333                                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2334                                                                 pushLineSeparator();
2335                                                 } catch (IndexOutOfBoundsException e) {
2336                                                 } // an eof will them be generated
2337                                                 break;
2338                                         }
2339                                         if (test > 0) {
2340                                                 // traditional and annotation comment
2341                                                 boolean star = false;
2342                                                 // try { // consume next character
2343                                                 unicodeAsBackSlash = false;
2344                                                 currentCharacter = source[currentPosition++];
2345                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2346                                                 // && (source[currentPosition] == 'u')) {
2347                                                 // getNextUnicodeChar();
2348                                                 // } else {
2349                                                 if (withoutUnicodePtr != 0) {
2350                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2351                                                 }
2352                                                 // };
2353                                                 // } catch (InvalidInputException ex) {
2354                                                 // };
2355                                                 if (currentCharacter == '*') {
2356                                                         star = true;
2357                                                 }
2358                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2359                                                         pushLineSeparator();
2360                                                 try { // get the next char
2361                                                         currentCharacter = source[currentPosition++];
2362                                                         // if (((currentCharacter = source[currentPosition++]) ==
2363                                                         // '\\')
2364                                                         // && (source[currentPosition] == 'u')) {
2365                                                         // //-------------unicode traitement ------------
2366                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2367                                                         // currentPosition++;
2368                                                         // while (source[currentPosition] == 'u') {
2369                                                         // currentPosition++;
2370                                                         // }
2371                                                         // if ((c1 =
2372                                                         // Character.getNumericValue(source[currentPosition++]))
2373                                                         // > 15
2374                                                         // || c1 < 0
2375                                                         // || (c2 =
2376                                                         // Character.getNumericValue(source[currentPosition++]))
2377                                                         // > 15
2378                                                         // || c2 < 0
2379                                                         // || (c3 =
2380                                                         // Character.getNumericValue(source[currentPosition++]))
2381                                                         // > 15
2382                                                         // || c3 < 0
2383                                                         // || (c4 =
2384                                                         // Character.getNumericValue(source[currentPosition++]))
2385                                                         // > 15
2386                                                         // || c4 < 0) {
2387                                                         // //error don't care of the value
2388                                                         // currentCharacter = 'A';
2389                                                         // } //something different from * and /
2390                                                         // else {
2391                                                         // currentCharacter =
2392                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2393                                                         // }
2394                                                         // }
2395                                                         // loop until end of comment */
2396                                                         while ((currentCharacter != '/') || (!star)) {
2397                                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2398                                                                         pushLineSeparator();
2399                                                                 star = currentCharacter == '*';
2400                                                                 // get next char
2401                                                                 currentCharacter = source[currentPosition++];
2402                                                                 // if (((currentCharacter = source[currentPosition++])
2403                                                                 // == '\\')
2404                                                                 // && (source[currentPosition] == 'u')) {
2405                                                                 // //-------------unicode traitement ------------
2406                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2407                                                                 // currentPosition++;
2408                                                                 // while (source[currentPosition] == 'u') {
2409                                                                 // currentPosition++;
2410                                                                 // }
2411                                                                 // if ((c1 =
2412                                                                 // Character.getNumericValue(source[currentPosition++]))
2413                                                                 // > 15
2414                                                                 // || c1 < 0
2415                                                                 // || (c2 =
2416                                                                 // Character.getNumericValue(source[currentPosition++]))
2417                                                                 // > 15
2418                                                                 // || c2 < 0
2419                                                                 // || (c3 =
2420                                                                 // Character.getNumericValue(source[currentPosition++]))
2421                                                                 // > 15
2422                                                                 // || c3 < 0
2423                                                                 // || (c4 =
2424                                                                 // Character.getNumericValue(source[currentPosition++]))
2425                                                                 // > 15
2426                                                                 // || c4 < 0) {
2427                                                                 // //error don't care of the value
2428                                                                 // currentCharacter = 'A';
2429                                                                 // } //something different from * and /
2430                                                                 // else {
2431                                                                 // currentCharacter =
2432                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2433                                                                 // }
2434                                                                 // }
2435                                                         }
2436                                                 } catch (IndexOutOfBoundsException e) {
2437                                                         return;
2438                                                 }
2439                                                 break;
2440                                         }
2441                                         break;
2442                                 }
2443                                 default:
2444                                         if (isPHPIdentOrVarStart(currentCharacter) ) {
2445                                                 try {
2446                                                         scanIdentifierOrKeyword((currentCharacter == '$'));
2447                                                 } catch (InvalidInputException ex) {
2448                                                 }
2449                                                 ;
2450                                                 break;
2451                                         }
2452                                 if ( ObviousIdentCharNatures[currentCharacter]==C_DIGIT) {
2453 //                                      if (Character.isDigit(currentCharacter)) {
2454                                                 try {
2455                                                         scanNumber(false);
2456                                                 } catch (InvalidInputException ex) {
2457                                                 }
2458                                                 ;
2459                                                 break;
2460                                         }
2461                                 }
2462                         }
2463                         // -----------------end switch while try--------------------
2464                 } catch (IndexOutOfBoundsException e) {
2465                 } catch (InvalidInputException e) {
2466                 }
2467                 return;
2468         }
2469
2470         // public final boolean jumpOverUnicodeWhiteSpace()
2471         // throws InvalidInputException {
2472         // //BOOLEAN
2473         // //handle the case of unicode. Jump over the next whiteSpace
2474         // //making startPosition pointing on the next available char
2475         // //On false, the currentCharacter is filled up with a potential
2476         // //correct char
2477         //
2478         // try {
2479         // this.wasAcr = false;
2480         // int c1, c2, c3, c4;
2481         // int unicodeSize = 6;
2482         // currentPosition++;
2483         // while (source[currentPosition] == 'u') {
2484         // currentPosition++;
2485         // unicodeSize++;
2486         // }
2487         //
2488         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2489         // || c1 < 0)
2490         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2491         // || c2 < 0)
2492         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2493         // || c3 < 0)
2494         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2495         // || c4 < 0)) {
2496         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2497         // }
2498         //
2499         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2500         // if (recordLineSeparator
2501         // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2502         // pushLineSeparator();
2503         // if (Character.isWhitespace(currentCharacter))
2504         // return true;
2505         //
2506         // //buffer the new char which is not a white space
2507         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2508         // //withoutUnicodePtr == 1 is true here
2509         // return false;
2510         // } catch (IndexOutOfBoundsException e) {
2511         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2512         // }
2513         // }
2514         public final int[] getLineEnds() {
2515                 // return a bounded copy of this.lineEnds
2516                 int[] copy;
2517                 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2518                 return copy;
2519         }
2520
2521         public char[] getSource() {
2522                 return this.source;
2523         }
2524
2525         public static boolean isIdentifierOrKeyword(int token) {
2526                 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2527         }
2528
2529         final char[] optimizedCurrentTokenSource1() {
2530                 // return always the same char[] build only once
2531                 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2532                 char charOne = source[startPosition];
2533                 switch (charOne) {
2534                 case 'a':
2535                         return charArray_a;
2536                 case 'b':
2537                         return charArray_b;
2538                 case 'c':
2539                         return charArray_c;
2540                 case 'd':
2541                         return charArray_d;
2542                 case 'e':
2543                         return charArray_e;
2544                 case 'f':
2545                         return charArray_f;
2546                 case 'g':
2547                         return charArray_g;
2548                 case 'h':
2549                         return charArray_h;
2550                 case 'i':
2551                         return charArray_i;
2552                 case 'j':
2553                         return charArray_j;
2554                 case 'k':
2555                         return charArray_k;
2556                 case 'l':
2557                         return charArray_l;
2558                 case 'm':
2559                         return charArray_m;
2560                 case 'n':
2561                         return charArray_n;
2562                 case 'o':
2563                         return charArray_o;
2564                 case 'p':
2565                         return charArray_p;
2566                 case 'q':
2567                         return charArray_q;
2568                 case 'r':
2569                         return charArray_r;
2570                 case 's':
2571                         return charArray_s;
2572                 case 't':
2573                         return charArray_t;
2574                 case 'u':
2575                         return charArray_u;
2576                 case 'v':
2577                         return charArray_v;
2578                 case 'w':
2579                         return charArray_w;
2580                 case 'x':
2581                         return charArray_x;
2582                 case 'y':
2583                         return charArray_y;
2584                 case 'z':
2585                         return charArray_z;
2586                 default:
2587                         return new char[] { charOne };
2588                 }
2589         }
2590
2591         final char[] optimizedCurrentTokenSource2() {
2592                 char c0, c1;
2593                 c0 = source[startPosition];
2594                 c1 = source[startPosition + 1];
2595                 if (c0 == '$') {
2596                         // return always the same char[] build only once
2597                         // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2598                         switch (c1) {
2599                         case 'a':
2600                                 return charArray_va;
2601                         case 'b':
2602                                 return charArray_vb;
2603                         case 'c':
2604                                 return charArray_vc;
2605                         case 'd':
2606                                 return charArray_vd;
2607                         case 'e':
2608                                 return charArray_ve;
2609                         case 'f':
2610                                 return charArray_vf;
2611                         case 'g':
2612                                 return charArray_vg;
2613                         case 'h':
2614                                 return charArray_vh;
2615                         case 'i':
2616                                 return charArray_vi;
2617                         case 'j':
2618                                 return charArray_vj;
2619                         case 'k':
2620                                 return charArray_vk;
2621                         case 'l':
2622                                 return charArray_vl;
2623                         case 'm':
2624                                 return charArray_vm;
2625                         case 'n':
2626                                 return charArray_vn;
2627                         case 'o':
2628                                 return charArray_vo;
2629                         case 'p':
2630                                 return charArray_vp;
2631                         case 'q':
2632                                 return charArray_vq;
2633                         case 'r':
2634                                 return charArray_vr;
2635                         case 's':
2636                                 return charArray_vs;
2637                         case 't':
2638                                 return charArray_vt;
2639                         case 'u':
2640                                 return charArray_vu;
2641                         case 'v':
2642                                 return charArray_vv;
2643                         case 'w':
2644                                 return charArray_vw;
2645                         case 'x':
2646                                 return charArray_vx;
2647                         case 'y':
2648                                 return charArray_vy;
2649                         case 'z':
2650                                 return charArray_vz;
2651                         }
2652                 }
2653                 // try to return the same char[] build only once
2654                 int hash = ((c0 << 6) + c1) % TableSize;
2655                 char[][] table = charArray_length[0][hash];
2656                 int i = newEntry2;
2657                 while (++i < InternalTableSize) {
2658                         char[] charArray = table[i];
2659                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2660                                 return charArray;
2661                 }
2662                 // ---------other side---------
2663                 i = -1;
2664                 int max = newEntry2;
2665                 while (++i <= max) {
2666                         char[] charArray = table[i];
2667                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2668                                 return charArray;
2669                 }
2670                 // --------add the entry-------
2671                 if (++max >= InternalTableSize)
2672                         max = 0;
2673                 char[] r;
2674                 table[max] = (r = new char[] { c0, c1 });
2675                 newEntry2 = max;
2676                 return r;
2677         }
2678
2679         final char[] optimizedCurrentTokenSource3() {
2680                 // try to return the same char[] build only once
2681                 char c0, c1, c2;
2682                 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2683                                 % TableSize;
2684                 char[][] table = charArray_length[1][hash];
2685                 int i = newEntry3;
2686                 while (++i < InternalTableSize) {
2687                         char[] charArray = table[i];
2688                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2689                                 return charArray;
2690                 }
2691                 // ---------other side---------
2692                 i = -1;
2693                 int max = newEntry3;
2694                 while (++i <= max) {
2695                         char[] charArray = table[i];
2696                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2697                                 return charArray;
2698                 }
2699                 // --------add the entry-------
2700                 if (++max >= InternalTableSize)
2701                         max = 0;
2702                 char[] r;
2703                 table[max] = (r = new char[] { c0, c1, c2 });
2704                 newEntry3 = max;
2705                 return r;
2706         }
2707
2708         final char[] optimizedCurrentTokenSource4() {
2709                 // try to return the same char[] build only once
2710                 char c0, c1, c2, c3;
2711                 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2712                                 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2713                                 % TableSize;
2714                 char[][] table = charArray_length[2][(int) hash];
2715                 int i = newEntry4;
2716                 while (++i < InternalTableSize) {
2717                         char[] charArray = table[i];
2718                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2719                                 return charArray;
2720                 }
2721                 // ---------other side---------
2722                 i = -1;
2723                 int max = newEntry4;
2724                 while (++i <= max) {
2725                         char[] charArray = table[i];
2726                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2727                                 return charArray;
2728                 }
2729                 // --------add the entry-------
2730                 if (++max >= InternalTableSize)
2731                         max = 0;
2732                 char[] r;
2733                 table[max] = (r = new char[] { c0, c1, c2, c3 });
2734                 newEntry4 = max;
2735                 return r;
2736         }
2737
2738         final char[] optimizedCurrentTokenSource5() {
2739                 // try to return the same char[] build only once
2740                 char c0, c1, c2, c3, c4;
2741                 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2742                                 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2743                                 % TableSize;
2744                 char[][] table = charArray_length[3][(int) hash];
2745                 int i = newEntry5;
2746                 while (++i < InternalTableSize) {
2747                         char[] charArray = table[i];
2748                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2749                                 return charArray;
2750                 }
2751                 // ---------other side---------
2752                 i = -1;
2753                 int max = newEntry5;
2754                 while (++i <= max) {
2755                         char[] charArray = table[i];
2756                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2757                                 return charArray;
2758                 }
2759                 // --------add the entry-------
2760                 if (++max >= InternalTableSize)
2761                         max = 0;
2762                 char[] r;
2763                 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2764                 newEntry5 = max;
2765                 return r;
2766         }
2767
2768         final char[] optimizedCurrentTokenSource6() {
2769                 // try to return the same char[] build only once
2770                 char c0, c1, c2, c3, c4, c5;
2771                 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2772                                 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2773                                 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2774                                 % TableSize;
2775                 char[][] table = charArray_length[4][(int) hash];
2776                 int i = newEntry6;
2777                 while (++i < InternalTableSize) {
2778                         char[] charArray = table[i];
2779                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2780                                         && (c5 == charArray[5]))
2781                                 return charArray;
2782                 }
2783                 // ---------other side---------
2784                 i = -1;
2785                 int max = newEntry6;
2786                 while (++i <= max) {
2787                         char[] charArray = table[i];
2788                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2789                                         && (c5 == charArray[5]))
2790                                 return charArray;
2791                 }
2792                 // --------add the entry-------
2793                 if (++max >= InternalTableSize)
2794                         max = 0;
2795                 char[] r;
2796                 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2797                 newEntry6 = max;
2798                 return r;
2799         }
2800
2801         public final void pushLineSeparator() throws InvalidInputException {
2802                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2803                 final int INCREMENT = 250;
2804                 if (this.checkNonExternalizedStringLiterals) {
2805                         // reinitialize the current line for non externalize strings purpose
2806                         currentLine = null;
2807                 }
2808                 // currentCharacter is at position currentPosition-1
2809                 // cr 000D
2810                 if (currentCharacter == '\r') {
2811                         int separatorPos = currentPosition - 1;
2812                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2813                                 return;
2814                         // System.out.println("CR-" + separatorPos);
2815                         try {
2816                                 lineEnds[++linePtr] = separatorPos;
2817                         } catch (IndexOutOfBoundsException e) {
2818                                 // linePtr value is correct
2819                                 int oldLength = lineEnds.length;
2820                                 int[] old = lineEnds;
2821                                 lineEnds = new int[oldLength + INCREMENT];
2822                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2823                                 lineEnds[linePtr] = separatorPos;
2824                         }
2825                         // look-ahead for merged cr+lf
2826                         try {
2827                                 if (source[currentPosition] == '\n') {
2828                                         // System.out.println("look-ahead LF-" + currentPosition);
2829                                         lineEnds[linePtr] = currentPosition;
2830                                         currentPosition++;
2831                                         wasAcr = false;
2832                                 } else {
2833                                         wasAcr = true;
2834                                 }
2835                         } catch (IndexOutOfBoundsException e) {
2836                                 wasAcr = true;
2837                         }
2838                 } else {
2839                         // lf 000A
2840                         if (currentCharacter == '\n') {
2841                                 // must merge eventual cr followed by lf
2842                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2843                                         // System.out.println("merge LF-" + (currentPosition - 1));
2844                                         lineEnds[linePtr] = currentPosition - 1;
2845                                 } else {
2846                                         int separatorPos = currentPosition - 1;
2847                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2848                                                 return;
2849                                         // System.out.println("LF-" + separatorPos);
2850                                         try {
2851                                                 lineEnds[++linePtr] = separatorPos;
2852                                         } catch (IndexOutOfBoundsException e) {
2853                                                 // linePtr value is correct
2854                                                 int oldLength = lineEnds.length;
2855                                                 int[] old = lineEnds;
2856                                                 lineEnds = new int[oldLength + INCREMENT];
2857                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2858                                                 lineEnds[linePtr] = separatorPos;
2859                                         }
2860                                 }
2861                                 wasAcr = false;
2862                         }
2863                 }
2864         }
2865
2866         public final void pushUnicodeLineSeparator() {
2867                 // isUnicode means that the \r or \n has been read as a unicode character
2868                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2869                 final int INCREMENT = 250;
2870                 // currentCharacter is at position currentPosition-1
2871                 if (this.checkNonExternalizedStringLiterals) {
2872                         // reinitialize the current line for non externalize strings purpose
2873                         currentLine = null;
2874                 }
2875                 // cr 000D
2876                 if (currentCharacter == '\r') {
2877                         int separatorPos = currentPosition - 6;
2878                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2879                                 return;
2880                         // System.out.println("CR-" + separatorPos);
2881                         try {
2882                                 lineEnds[++linePtr] = separatorPos;
2883                         } catch (IndexOutOfBoundsException e) {
2884                                 // linePtr value is correct
2885                                 int oldLength = lineEnds.length;
2886                                 int[] old = lineEnds;
2887                                 lineEnds = new int[oldLength + INCREMENT];
2888                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2889                                 lineEnds[linePtr] = separatorPos;
2890                         }
2891                         // look-ahead for merged cr+lf
2892                         if (source[currentPosition] == '\n') {
2893                                 // System.out.println("look-ahead LF-" + currentPosition);
2894                                 lineEnds[linePtr] = currentPosition;
2895                                 currentPosition++;
2896                                 wasAcr = false;
2897                         } else {
2898                                 wasAcr = true;
2899                         }
2900                 } else {
2901                         // lf 000A
2902                         if (currentCharacter == '\n') {
2903                                 // must merge eventual cr followed by lf
2904                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2905                                         // System.out.println("merge LF-" + (currentPosition - 1));
2906                                         lineEnds[linePtr] = currentPosition - 6;
2907                                 } else {
2908                                         int separatorPos = currentPosition - 6;
2909                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2910                                                 return;
2911                                         // System.out.println("LF-" + separatorPos);
2912                                         try {
2913                                                 lineEnds[++linePtr] = separatorPos;
2914                                         } catch (IndexOutOfBoundsException e) {
2915                                                 // linePtr value is correct
2916                                                 int oldLength = lineEnds.length;
2917                                                 int[] old = lineEnds;
2918                                                 lineEnds = new int[oldLength + INCREMENT];
2919                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2920                                                 lineEnds[linePtr] = separatorPos;
2921                                         }
2922                                 }
2923                                 wasAcr = false;
2924                         }
2925                 }
2926         }
2927
2928         public void recordComment(int token) {
2929                 // compute position
2930                 int stopPosition = this.currentPosition;
2931                 switch (token) {
2932                 case TokenNameCOMMENT_LINE:
2933                         stopPosition = -this.lastCommentLinePosition;
2934                         break;
2935                 case TokenNameCOMMENT_BLOCK:
2936                         stopPosition = -this.currentPosition;
2937                         break;
2938                 }
2939
2940                 // a new comment is recorded
2941                 int length = this.commentStops.length;
2942                 if (++this.commentPtr >= length) {
2943                         System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2944                         // grows the positions buffers too
2945                         System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2946                 }
2947                 this.commentStops[this.commentPtr] = stopPosition;
2948                 this.commentStarts[this.commentPtr] = this.startPosition;
2949         }
2950
2951         // public final void recordComment(boolean isJavadoc) {
2952         // // a new annotation comment is recorded
2953         // try {
2954         // commentStops[++commentPtr] = isJavadoc
2955         // ? currentPosition
2956         // : -currentPosition;
2957         // } catch (IndexOutOfBoundsException e) {
2958         // int oldStackLength = commentStops.length;
2959         // int[] oldStack = commentStops;
2960         // commentStops = new int[oldStackLength + 30];
2961         // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2962         // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2963         // //grows the positions buffers too
2964         // int[] old = commentStarts;
2965         // commentStarts = new int[oldStackLength + 30];
2966         // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2967         // }
2968         // //the buffer is of a correct size here
2969         // commentStarts[commentPtr] = startPosition;
2970         // }
2971         public void resetTo(int begin, int end) {
2972                 // reset the scanner to a given position where it may rescan again
2973                 diet = false;
2974                 initialPosition = startPosition = currentPosition = begin;
2975                 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2976                 commentPtr = -1; // reset comment stack
2977         }
2978
2979         public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2980                 // the string with "\\u" is a legal string of two chars \ and u
2981                 // thus we use a direct access to the source (for regular cases).
2982                 // if (unicodeAsBackSlash) {
2983                 // // consume next character
2984                 // unicodeAsBackSlash = false;
2985                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2986                 // && (source[currentPosition] == 'u')) {
2987                 // getNextUnicodeChar();
2988                 // } else {
2989                 // if (withoutUnicodePtr != 0) {
2990                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2991                 // }
2992                 // }
2993                 // } else
2994                 currentCharacter = source[currentPosition++];
2995                 switch (currentCharacter) {
2996                 case '\'':
2997                         currentCharacter = '\'';
2998                         break;
2999                 case '\\':
3000                         currentCharacter = '\\';
3001                         break;
3002                 default:
3003                         currentCharacter = '\\';
3004                         currentPosition--;
3005                 }
3006         }
3007
3008         public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3009                 currentCharacter = source[currentPosition++];
3010                 switch (currentCharacter) {
3011                 // case 'b' :
3012                 // currentCharacter = '\b';
3013                 // break;
3014                 case 't':
3015                         currentCharacter = '\t';
3016                         break;
3017                 case 'n':
3018                         currentCharacter = '\n';
3019                         break;
3020                 // case 'f' :
3021                 // currentCharacter = '\f';
3022                 // break;
3023                 case 'r':
3024                         currentCharacter = '\r';
3025                         break;
3026                 case '\"':
3027                         currentCharacter = '\"';
3028                         break;
3029                 case '\'':
3030                         currentCharacter = '\'';
3031                         break;
3032                 case '\\':
3033                         currentCharacter = '\\';
3034                         break;
3035                 case '$':
3036                         currentCharacter = '$';
3037                         break;
3038                 default:
3039                         // -----------octal escape--------------
3040                         // OctalDigit
3041                         // OctalDigit OctalDigit
3042                         // ZeroToThree OctalDigit OctalDigit
3043                         int number = Character.getNumericValue(currentCharacter);
3044                         if (number >= 0 && number <= 7) {
3045                                 boolean zeroToThreeNot = number > 3;
3046                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3047                                         int digit = Character.getNumericValue(currentCharacter);
3048                                         if (digit >= 0 && digit <= 7) {
3049                                                 number = (number * 8) + digit;
3050                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3051                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3052                                                                 // Digit --> ignore last character
3053                                                                 currentPosition--;
3054                                                         } else {
3055                                                                 digit = Character.getNumericValue(currentCharacter);
3056                                                                 if (digit >= 0 && digit <= 7) {
3057                                                                         // has read \ZeroToThree OctalDigit OctalDigit
3058                                                                         number = (number * 8) + digit;
3059                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3060                                                                         // --> ignore last character
3061                                                                         currentPosition--;
3062                                                                 }
3063                                                         }
3064                                                 } else { // has read \OctalDigit NonDigit--> ignore last
3065                                                         // character
3066                                                         currentPosition--;
3067                                                 }
3068                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
3069                                                 // character
3070                                                 currentPosition--;
3071                                         }
3072                                 } else { // has read \OctalDigit --> ignore last character
3073                                         currentPosition--;
3074                                 }
3075                                 if (number > 255)
3076                                         throw new InvalidInputException(INVALID_ESCAPE);
3077                                 currentCharacter = (char) number;
3078                         }
3079                 // else
3080                 // throw new InvalidInputException(INVALID_ESCAPE);
3081                 }
3082         }
3083
3084         // public int scanIdentifierOrKeyword() throws InvalidInputException {
3085         // return scanIdentifierOrKeyword( false );
3086         // }
3087         public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3088                 // test keywords
3089                 // first dispatch on the first char.
3090                 // then the length. If there are several
3091                 // keywors with the same length AND the same first char, then do another
3092                 // disptach on the second char :-)...cool....but fast !
3093                 useAssertAsAnIndentifier = false;
3094                 while (getNextCharAsJavaIdentifierPart()) {
3095                 }
3096                 ;
3097                 if (isVariable) {
3098                         // if (new String(getCurrentTokenSource()).equals("$this")) {
3099                         // return TokenNamethis;
3100                         // }
3101                         return TokenNameVariable;
3102                 }
3103                 int index, length;
3104                 char[] data;
3105                 char firstLetter;
3106                 // if (withoutUnicodePtr == 0)
3107                 // quick test on length == 1 but not on length > 12 while most identifier
3108                 // have a length which is <= 12...but there are lots of identifier with
3109                 // only one char....
3110                 // {
3111                 if ((length = currentPosition - startPosition) == 1)
3112                         return TokenNameIdentifier;
3113                 // data = source;
3114                 data = new char[length];
3115                 index = startPosition;
3116                 for (int i = 0; i < length; i++) {
3117                         data[i] = Character.toLowerCase(source[index + i]);
3118                 }
3119                 index = 0;
3120                 // } else {
3121                 // if ((length = withoutUnicodePtr) == 1)
3122                 // return TokenNameIdentifier;
3123                 // // data = withoutUnicodeBuffer;
3124                 // data = new char[withoutUnicodeBuffer.length];
3125                 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3126                 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3127                 // }
3128                 // index = 1;
3129                 // }
3130                 firstLetter = data[index];
3131                 switch (firstLetter) {
3132                 case '_':
3133                         switch (length) {
3134                         case 8:
3135                                 // __FILE__
3136                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3137                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3138                                         return TokenNameFILE;
3139                                 index = 0; // __LINE__
3140                                 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3141                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3142                                         return TokenNameLINE;
3143                                 break;
3144                         case 9:
3145                                 // __CLASS__
3146                                 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3147                                                 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3148                                         return TokenNameCLASS_C;
3149                                 break;
3150                         case 11:
3151                                 // __METHOD__
3152                                 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3153                                                 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3154                                                 && (data[++index] == '_'))
3155                                         return TokenNameMETHOD_C;
3156                                 break;
3157                         case 12:
3158                                 // __FUNCTION__
3159                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3160                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3161                                                 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3162                                         return TokenNameFUNC_C;
3163                                 break;
3164                         }
3165                         return TokenNameIdentifier;
3166                 case 'a':
3167                         // as and array abstract
3168                         switch (length) {
3169                         case 2:
3170                                 // as
3171                                 if ((data[++index] == 's')) {
3172                                         return TokenNameas;
3173                                 } else {
3174                                         return TokenNameIdentifier;
3175                                 }
3176                         case 3:
3177                                 // and
3178                                 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3179                                         return TokenNameand;
3180                                 } else {
3181                                         return TokenNameIdentifier;
3182                                 }
3183                         case 5:
3184                                 // array
3185                                 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3186                                         return TokenNamearray;
3187                                 else
3188                                         return TokenNameIdentifier;
3189                         case 8:
3190                                 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3191                                                 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3192                                         return TokenNameabstract;
3193                                 else
3194                                         return TokenNameIdentifier;
3195                         default:
3196                                 return TokenNameIdentifier;
3197                         }
3198                 case 'b':
3199                         // break
3200                         switch (length) {
3201                         case 5:
3202                                 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3203                                         return TokenNamebreak;
3204                                 else
3205                                         return TokenNameIdentifier;
3206                         default:
3207                                 return TokenNameIdentifier;
3208                         }
3209                 case 'c':
3210                         // case catch class clone const continue
3211                         switch (length) {
3212                         case 4:
3213                                 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3214                                         return TokenNamecase;
3215                                 else
3216                                         return TokenNameIdentifier;
3217                         case 5:
3218                                 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3219                                         return TokenNamecatch;
3220                                 index = 0;
3221                                 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3222                                         return TokenNameclass;
3223                                 index = 0;
3224                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3225                                         return TokenNameclone;
3226                                 index = 0;
3227                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3228                                         return TokenNameconst;
3229                                 else
3230                                         return TokenNameIdentifier;
3231                         case 8:
3232                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3233                                                 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3234                                         return TokenNamecontinue;
3235                                 else
3236                                         return TokenNameIdentifier;
3237                         default:
3238                                 return TokenNameIdentifier;
3239                         }
3240                 case 'd':
3241                         // declare default do die
3242                         // TODO delete define ==> no keyword !
3243                         switch (length) {
3244                         case 2:
3245                                 if ((data[++index] == 'o'))
3246                                         return TokenNamedo;
3247                                 else
3248                                         return TokenNameIdentifier;
3249                         // case 6 :
3250                         // if ((data[++index] == 'e')
3251                         // && (data[++index] == 'f')
3252                         // && (data[++index] == 'i')
3253                         // && (data[++index] == 'n')
3254                         // && (data[++index] == 'e'))
3255                         // return TokenNamedefine;
3256                         // else
3257                         // return TokenNameIdentifier;
3258                         case 7:
3259                                 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3260                                                 && (data[++index] == 'r') && (data[++index] == 'e'))
3261                                         return TokenNamedeclare;
3262                                 index = 0;
3263                                 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3264                                                 && (data[++index] == 'l') && (data[++index] == 't'))
3265                                         return TokenNamedefault;
3266                                 else
3267                                         return TokenNameIdentifier;
3268                         default:
3269                                 return TokenNameIdentifier;
3270                         }
3271                 case 'e':
3272                         // echo else exit elseif extends eval
3273                         switch (length) {
3274                         case 4:
3275                                 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3276                                         return TokenNameecho;
3277                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3278                                         return TokenNameelse;
3279                                 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3280                                         return TokenNameexit;
3281                                 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3282                                         return TokenNameeval;
3283                                 else
3284                                         return TokenNameIdentifier;
3285                         case 5:
3286                                 // endif empty
3287                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3288                                         return TokenNameendif;
3289                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3290                                         return TokenNameempty;
3291                                 else
3292                                         return TokenNameIdentifier;
3293                         case 6:
3294                                 // endfor
3295                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3296                                                 && (data[++index] == 'r'))
3297                                         return TokenNameendfor;
3298                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3299                                                 && (data[++index] == 'f'))
3300                                         return TokenNameelseif;
3301                                 else
3302                                         return TokenNameIdentifier;
3303                         case 7:
3304                                 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3305                                                 && (data[++index] == 'd') && (data[++index] == 's'))
3306                                         return TokenNameextends;
3307                                 else
3308                                         return TokenNameIdentifier;
3309                         case 8:
3310                                 // endwhile
3311                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3312                                                 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3313                                         return TokenNameendwhile;
3314                                 else
3315                                         return TokenNameIdentifier;
3316                         case 9:
3317                                 // endswitch
3318                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3319                                                 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3320                                         return TokenNameendswitch;
3321                                 else
3322                                         return TokenNameIdentifier;
3323                         case 10:
3324                                 // enddeclare
3325                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3326                                                 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3327                                                 && (data[++index] == 'e'))
3328                                         return TokenNameenddeclare;
3329                                 index = 0;
3330                                 if ((data[++index] == 'n') // endforeach
3331                                                 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3332                                                 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3333                                         return TokenNameendforeach;
3334                                 else
3335                                         return TokenNameIdentifier;
3336                         default:
3337                                 return TokenNameIdentifier;
3338                         }
3339                 case 'f':
3340                         // for false final function
3341                         switch (length) {
3342                         case 3:
3343                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3344                                         return TokenNamefor;
3345                                 else
3346                                         return TokenNameIdentifier;
3347                         case 5:
3348                                 // if ((data[++index] == 'a') && (data[++index] == 'l')
3349                                 // && (data[++index] == 's') && (data[++index] == 'e'))
3350                                 // return TokenNamefalse;
3351                                 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3352                                         return TokenNamefinal;
3353                                 else
3354                                         return TokenNameIdentifier;
3355                         case 7:
3356                                 // foreach
3357                                 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3358                                                 && (data[++index] == 'c') && (data[++index] == 'h'))
3359                                         return TokenNameforeach;
3360                                 else
3361                                         return TokenNameIdentifier;
3362                         case 8:
3363                                 // function
3364                                 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3365                                                 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3366                                         return TokenNamefunction;
3367                                 else
3368                                         return TokenNameIdentifier;
3369                         default:
3370                                 return TokenNameIdentifier;
3371                         }
3372                 case 'g':
3373                         // global
3374                         if (length == 6) {
3375                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3376                                                 && (data[++index] == 'l')) {
3377                                         return TokenNameglobal;
3378                                 }
3379                         }
3380                         return TokenNameIdentifier;
3381                 case 'i':
3382                         // if int isset include include_once instanceof interface implements
3383                         switch (length) {
3384                         case 2:
3385                                 if (data[++index] == 'f')
3386                                         return TokenNameif;
3387                                 else
3388                                         return TokenNameIdentifier;
3389                         // case 3 :
3390                         // if ((data[++index] == 'n') && (data[++index] == 't'))
3391                         // return TokenNameint;
3392                         // else
3393                         // return TokenNameIdentifier;
3394                         case 5:
3395                                 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3396                                         return TokenNameisset;
3397                                 else
3398                                         return TokenNameIdentifier;
3399                         case 7:
3400                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3401                                                 && (data[++index] == 'd') && (data[++index] == 'e'))
3402                                         return TokenNameinclude;
3403                                 else
3404                                         return TokenNameIdentifier;
3405                         case 9:
3406                                 // interface
3407                                 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3408                                                 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3409                                         return TokenNameinterface;
3410                                 else
3411                                         return TokenNameIdentifier;
3412                         case 10:
3413                                 // instanceof
3414                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3415                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3416                                                 && (data[++index] == 'f'))
3417                                         return TokenNameinstanceof;
3418                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3419                                                 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3420                                                 && (data[++index] == 's'))
3421                                         return TokenNameimplements;
3422                                 else
3423                                         return TokenNameIdentifier;
3424                         case 12:
3425                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3426                                                 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3427                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3428                                         return TokenNameinclude_once;
3429                                 else
3430                                         return TokenNameIdentifier;
3431                         default:
3432                                 return TokenNameIdentifier;
3433                         }
3434                 case 'l':
3435                         // list
3436                         if (length == 4) {
3437                                 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3438                                         return TokenNamelist;
3439                                 }
3440                         }
3441                         return TokenNameIdentifier;
3442                 case 'n':
3443                         // new null
3444                         switch (length) {
3445                         case 3:
3446                                 if ((data[++index] == 'e') && (data[++index] == 'w'))
3447                                         return TokenNamenew;
3448                                 else
3449                                         return TokenNameIdentifier;
3450                         // case 4 :
3451                         // if ((data[++index] == 'u') && (data[++index] == 'l')
3452                         // && (data[++index] == 'l'))
3453                         // return TokenNamenull;
3454                         // else
3455                         // return TokenNameIdentifier;
3456                         default:
3457                                 return TokenNameIdentifier;
3458                         }
3459                 case 'o':
3460                         // or old_function
3461                         if (length == 2) {
3462                                 if (data[++index] == 'r') {
3463                                         return TokenNameor;
3464                                 }
3465                         }
3466                         // if (length == 12) {
3467                         // if ((data[++index] == 'l')
3468                         // && (data[++index] == 'd')
3469                         // && (data[++index] == '_')
3470                         // && (data[++index] == 'f')
3471                         // && (data[++index] == 'u')
3472                         // && (data[++index] == 'n')
3473                         // && (data[++index] == 'c')
3474                         // && (data[++index] == 't')
3475                         // && (data[++index] == 'i')
3476                         // && (data[++index] == 'o')
3477                         // && (data[++index] == 'n')) {
3478                         // return TokenNameold_function;
3479                         // }
3480                         // }
3481                         return TokenNameIdentifier;
3482                 case 'p':
3483                         // print public private protected
3484                         switch (length) {
3485                         case 5:
3486                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3487                                         return TokenNameprint;
3488                                 } else
3489                                         return TokenNameIdentifier;
3490                         case 6:
3491                                 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3492                                                 && (data[++index] == 'c')) {
3493                                         return TokenNamepublic;
3494                                 } else
3495                                         return TokenNameIdentifier;
3496                         case 7:
3497                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3498                                                 && (data[++index] == 't') && (data[++index] == 'e')) {
3499                                         return TokenNameprivate;
3500                                 } else
3501                                         return TokenNameIdentifier;
3502                         case 9:
3503                                 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3504                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3505                                         return TokenNameprotected;
3506                                 } else
3507                                         return TokenNameIdentifier;
3508                         }
3509                         return TokenNameIdentifier;
3510                 case 'r':
3511                         // return require require_once
3512                         if (length == 6) {
3513                                 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3514                                                 && (data[++index] == 'n')) {
3515                                         return TokenNamereturn;
3516                                 }
3517                         } else if (length == 7) {
3518                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3519                                                 && (data[++index] == 'r') && (data[++index] == 'e')) {
3520                                         return TokenNamerequire;
3521                                 }
3522                         } else if (length == 12) {
3523                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3524                                                 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3525                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3526                                         return TokenNamerequire_once;
3527                                 }
3528                         } else
3529                                 return TokenNameIdentifier;
3530                 case 's':
3531                         // static switch
3532                         switch (length) {
3533                         case 6:
3534                                 if (data[++index] == 't')
3535                                         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3536                                                 return TokenNamestatic;
3537                                         } else
3538                                                 return TokenNameIdentifier;
3539                                 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3540                                                 && (data[++index] == 'h'))
3541                                         return TokenNameswitch;
3542                                 else
3543                                         return TokenNameIdentifier;
3544                         default:
3545                                 return TokenNameIdentifier;
3546                         }
3547                 case 't':
3548                         // try true throw
3549                         switch (length) {
3550                         case 3:
3551                                 if ((data[++index] == 'r') && (data[++index] == 'y'))
3552                                         return TokenNametry;
3553                                 else
3554                                         return TokenNameIdentifier;
3555                         // case 4 :
3556                         // if ((data[++index] == 'r') && (data[++index] == 'u')
3557                         // && (data[++index] == 'e'))
3558                         // return TokenNametrue;
3559                         // else
3560                         // return TokenNameIdentifier;
3561                         case 5:
3562                                 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3563                                         return TokenNamethrow;
3564                                 else
3565                                         return TokenNameIdentifier;
3566                         default:
3567                                 return TokenNameIdentifier;
3568                         }
3569                 case 'u':
3570                         // use unset
3571                         switch (length) {
3572                         case 3:
3573                                 if ((data[++index] == 's') && (data[++index] == 'e'))
3574                                         return TokenNameuse;
3575                                 else
3576                                         return TokenNameIdentifier;
3577                         case 5:
3578                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3579                                         return TokenNameunset;
3580                                 else
3581                                         return TokenNameIdentifier;
3582                         default:
3583                                 return TokenNameIdentifier;
3584                         }
3585                 case 'v':
3586                         // var
3587                         switch (length) {
3588                         case 3:
3589                                 if ((data[++index] == 'a') && (data[++index] == 'r'))
3590                                         return TokenNamevar;
3591                                 else
3592                                         return TokenNameIdentifier;
3593                         default:
3594                                 return TokenNameIdentifier;
3595                         }
3596                 case 'w':
3597                         // while
3598                         switch (length) {
3599                         case 5:
3600                                 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3601                                         return TokenNamewhile;
3602                                 else
3603                                         return TokenNameIdentifier;
3604                         // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3605                         // (data[++index]=='e') && (data[++index]=='f')&&
3606                         // (data[++index]=='p'))
3607                         // return TokenNamewidefp ;
3608                         // else
3609                         // return TokenNameIdentifier;
3610                         default:
3611                                 return TokenNameIdentifier;
3612                         }
3613                 case 'x':
3614                         // xor
3615                         switch (length) {
3616                         case 3:
3617                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3618                                         return TokenNamexor;
3619                                 else
3620                                         return TokenNameIdentifier;
3621                         default:
3622                                 return TokenNameIdentifier;
3623                         }
3624                 default:
3625                         return TokenNameIdentifier;
3626                 }
3627         }
3628
3629         public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3630                 // when entering this method the currentCharacter is the firt
3631                 // digit of the number , i.e. it may be preceeded by a . when
3632                 // dotPrefix is true
3633                 boolean floating = dotPrefix;
3634                 if ((!dotPrefix) && (currentCharacter == '0')) {
3635                         if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3636                                 // force the first char of the hexa number do exist...
3637                                 // consume next character
3638                                 unicodeAsBackSlash = false;
3639                                 currentCharacter = source[currentPosition++];
3640                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3641                                 // && (source[currentPosition] == 'u')) {
3642                                 // getNextUnicodeChar();
3643                                 // } else {
3644                                 // if (withoutUnicodePtr != 0) {
3645                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3646                                 // }
3647                                 // }
3648                                 if (Character.digit(currentCharacter, 16) == -1)
3649                                         throw new InvalidInputException(INVALID_HEXA);
3650                                 // ---end forcing--
3651                                 while (getNextCharAsDigit(16)) {
3652                                 }
3653                                 ;
3654                                 // if (getNextChar('l', 'L') >= 0)
3655                                 // return TokenNameLongLiteral;
3656                                 // else
3657                                 return TokenNameIntegerLiteral;
3658                         }
3659                         // there is x or X in the number
3660                         // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3661                         // 00078.0 is true !!!!! crazy language
3662                         if (getNextCharAsDigit()) {
3663                                 // -------------potential octal-----------------
3664                                 while (getNextCharAsDigit()) {
3665                                 }
3666                                 ;
3667                                 // if (getNextChar('l', 'L') >= 0) {
3668                                 // return TokenNameLongLiteral;
3669                                 // }
3670                                 //
3671                                 // if (getNextChar('f', 'F') >= 0) {
3672                                 // return TokenNameFloatingPointLiteral;
3673                                 // }
3674                                 if (getNextChar('d', 'D') >= 0) {
3675                                         return TokenNameDoubleLiteral;
3676                                 } else { // make the distinction between octal and float ....
3677                                         if (getNextChar('.')) { // bingo ! ....
3678                                                 while (getNextCharAsDigit()) {
3679                                                 }
3680                                                 ;
3681                                                 if (getNextChar('e', 'E') >= 0) {
3682                                                         // consume next character
3683                                                         unicodeAsBackSlash = false;
3684                                                         currentCharacter = source[currentPosition++];
3685                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3686                                                         // && (source[currentPosition] == 'u')) {
3687                                                         // getNextUnicodeChar();
3688                                                         // } else {
3689                                                         // if (withoutUnicodePtr != 0) {
3690                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3691                                                         // }
3692                                                         // }
3693                                                         if ((currentCharacter == '-') || (currentCharacter == '+')) {
3694                                                                 // consume next character
3695                                                                 unicodeAsBackSlash = false;
3696                                                                 currentCharacter = source[currentPosition++];
3697                                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3698                                                                 // && (source[currentPosition] == 'u')) {
3699                                                                 // getNextUnicodeChar();
3700                                                                 // } else {
3701                                                                 // if (withoutUnicodePtr != 0) {
3702                                                                 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3703                                                                 // currentCharacter;
3704                                                                 // }
3705                                                                 // }
3706                                                         }
3707                                                         if (!Character.isDigit(currentCharacter))
3708                                                                 throw new InvalidInputException(INVALID_FLOAT);
3709                                                         while (getNextCharAsDigit()) {
3710                                                         }
3711                                                         ;
3712                                                 }
3713                                                 // if (getNextChar('f', 'F') >= 0)
3714                                                 // return TokenNameFloatingPointLiteral;
3715                                                 getNextChar('d', 'D'); // jump over potential d or D
3716                                                 return TokenNameDoubleLiteral;
3717                                         } else {
3718                                                 return TokenNameIntegerLiteral;
3719                                         }
3720                                 }
3721                         } else {
3722                                 /* carry on */
3723                         }
3724                 }
3725                 while (getNextCharAsDigit()) {
3726                 }
3727                 ;
3728                 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3729                 // return TokenNameLongLiteral;
3730                 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3731                         while (getNextCharAsDigit()) {
3732                         }
3733                         ;
3734                         floating = true;
3735                 }
3736                 // if floating is true both exponant and suffix may be optional
3737                 if (getNextChar('e', 'E') >= 0) {
3738                         floating = true;
3739                         // consume next character
3740                         unicodeAsBackSlash = false;
3741                         currentCharacter = source[currentPosition++];
3742                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3743                         // && (source[currentPosition] == 'u')) {
3744                         // getNextUnicodeChar();
3745                         // } else {
3746                         // if (withoutUnicodePtr != 0) {
3747                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3748                         // }
3749                         // }
3750                         if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3751                                 // next
3752                                 // character
3753                                 unicodeAsBackSlash = false;
3754                                 currentCharacter = source[currentPosition++];
3755                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3756                                 // && (source[currentPosition] == 'u')) {
3757                                 // getNextUnicodeChar();
3758                                 // } else {
3759                                 // if (withoutUnicodePtr != 0) {
3760                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3761                                 // }
3762                                 // }
3763                         }
3764                         if (!Character.isDigit(currentCharacter))
3765                                 throw new InvalidInputException(INVALID_FLOAT);
3766                         while (getNextCharAsDigit()) {
3767                         }
3768                         ;
3769                 }
3770                 if (getNextChar('d', 'D') >= 0)
3771                         return TokenNameDoubleLiteral;
3772                 // if (getNextChar('f', 'F') >= 0)
3773                 // return TokenNameFloatingPointLiteral;
3774                 // the long flag has been tested before
3775                 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3776         }
3777
3778         /**
3779          * Search the line number corresponding to a specific position
3780          *
3781          */
3782         public final int getLineNumber(int position) {
3783                 if (lineEnds == null)
3784                         return 1;
3785                 int length = linePtr + 1;
3786                 if (length == 0)
3787                         return 1;
3788                 int g = 0, d = length - 1;
3789                 int m = 0;
3790                 while (g <= d) {
3791                         m = (g + d) / 2;
3792                         if (position < lineEnds[m]) {
3793                                 d = m - 1;
3794                         } else if (position > lineEnds[m]) {
3795                                 g = m + 1;
3796                         } else {
3797                                 return m + 1;
3798                         }
3799                 }
3800                 if (position < lineEnds[m]) {
3801                         return m + 1;
3802                 }
3803                 return m + 2;
3804         }
3805
3806         public void setPHPMode(boolean mode) {
3807                 phpMode = mode;
3808         }
3809
3810         public final void setSource(char[] source) {
3811                 setSource(null, source);
3812         }
3813
3814         public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3815                 // the source-buffer is set to sourceString
3816                 this.compilationUnit = compilationUnit;
3817                 if (source == null) {
3818                         this.source = new char[0];
3819                 } else {
3820                         this.source = source;
3821                 }
3822                 startPosition = -1;
3823                 initialPosition = currentPosition = 0;
3824                 containsAssertKeyword = false;
3825                 withoutUnicodeBuffer = new char[this.source.length];
3826                 // encapsedStringStack = new Stack();
3827         }
3828
3829         public String toString() {
3830                 if (startPosition == source.length)
3831                         return "EOF\n\n" + new String(source); //$NON-NLS-1$
3832                 if (currentPosition > source.length)
3833                         return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3834                 char front[] = new char[startPosition];
3835                 System.arraycopy(source, 0, front, 0, startPosition);
3836                 int middleLength = (currentPosition - 1) - startPosition + 1;
3837                 char middle[];
3838                 if (middleLength > -1) {
3839                         middle = new char[middleLength];
3840                         System.arraycopy(source, startPosition, middle, 0, middleLength);
3841                 } else {
3842                         middle = new char[0];
3843                 }
3844                 char end[] = new char[source.length - (currentPosition - 1)];
3845                 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3846                 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3847                                 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3848                                 + new String(end);
3849         }
3850
3851         public final String toStringAction(int act) {
3852                 switch (act) {
3853                 case TokenNameERROR:
3854                         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3855                 // //$NON-NLS-1$
3856                 case TokenNameINLINE_HTML:
3857                         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3858                 case TokenNameIdentifier:
3859                         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3860                 case TokenNameVariable:
3861                         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3862                 case TokenNameabstract:
3863                         return "abstract"; //$NON-NLS-1$
3864                 case TokenNameand:
3865                         return "AND"; //$NON-NLS-1$
3866                 case TokenNamearray:
3867                         return "array"; //$NON-NLS-1$
3868                 case TokenNameas:
3869                         return "as"; //$NON-NLS-1$
3870                 case TokenNamebreak:
3871                         return "break"; //$NON-NLS-1$
3872                 case TokenNamecase:
3873                         return "case"; //$NON-NLS-1$
3874                 case TokenNameclass:
3875                         return "class"; //$NON-NLS-1$
3876                 case TokenNamecatch:
3877                         return "catch"; //$NON-NLS-1$
3878                 case TokenNameclone:
3879                         //$NON-NLS-1$
3880                         return "clone";
3881                 case TokenNameconst:
3882                         //$NON-NLS-1$
3883                         return "const";
3884                 case TokenNamecontinue:
3885                         return "continue"; //$NON-NLS-1$
3886                 case TokenNamedefault:
3887                         return "default"; //$NON-NLS-1$
3888                 // case TokenNamedefine :
3889                 // return "define"; //$NON-NLS-1$
3890                 case TokenNamedo:
3891                         return "do"; //$NON-NLS-1$
3892                 case TokenNameecho:
3893                         return "echo"; //$NON-NLS-1$
3894                 case TokenNameelse:
3895                         return "else"; //$NON-NLS-1$
3896                 case TokenNameelseif:
3897                         return "elseif"; //$NON-NLS-1$
3898                 case TokenNameendfor:
3899                         return "endfor"; //$NON-NLS-1$
3900                 case TokenNameendforeach:
3901                         return "endforeach"; //$NON-NLS-1$
3902                 case TokenNameendif:
3903                         return "endif"; //$NON-NLS-1$
3904                 case TokenNameendswitch:
3905                         return "endswitch"; //$NON-NLS-1$
3906                 case TokenNameendwhile:
3907                         return "endwhile"; //$NON-NLS-1$
3908                 case TokenNameexit:
3909                         return "exit";
3910                 case TokenNameextends:
3911                         return "extends"; //$NON-NLS-1$
3912                 // case TokenNamefalse :
3913                 // return "false"; //$NON-NLS-1$
3914                 case TokenNamefinal:
3915                         return "final"; //$NON-NLS-1$
3916                 case TokenNamefor:
3917                         return "for"; //$NON-NLS-1$
3918                 case TokenNameforeach:
3919                         return "foreach"; //$NON-NLS-1$
3920                 case TokenNamefunction:
3921                         return "function"; //$NON-NLS-1$
3922                 case TokenNameglobal:
3923                         return "global"; //$NON-NLS-1$
3924                 case TokenNameif:
3925                         return "if"; //$NON-NLS-1$
3926                 case TokenNameimplements:
3927                         return "implements"; //$NON-NLS-1$
3928                 case TokenNameinclude:
3929                         return "include"; //$NON-NLS-1$
3930                 case TokenNameinclude_once:
3931                         return "include_once"; //$NON-NLS-1$
3932                 case TokenNameinstanceof:
3933                         return "instanceof"; //$NON-NLS-1$
3934                 case TokenNameinterface:
3935                         return "interface"; //$NON-NLS-1$
3936                 case TokenNameisset:
3937                         return "isset"; //$NON-NLS-1$
3938                 case TokenNamelist:
3939                         return "list"; //$NON-NLS-1$
3940                 case TokenNamenew:
3941                         return "new"; //$NON-NLS-1$
3942                 // case TokenNamenull :
3943                 // return "null"; //$NON-NLS-1$
3944                 case TokenNameor:
3945                         return "OR"; //$NON-NLS-1$
3946                 case TokenNameprint:
3947                         return "print"; //$NON-NLS-1$
3948                 case TokenNameprivate:
3949                         return "private"; //$NON-NLS-1$
3950                 case TokenNameprotected:
3951                         return "protected"; //$NON-NLS-1$
3952                 case TokenNamepublic:
3953                         return "public"; //$NON-NLS-1$
3954                 case TokenNamerequire:
3955                         return "require"; //$NON-NLS-1$
3956                 case TokenNamerequire_once:
3957                         return "require_once"; //$NON-NLS-1$
3958                 case TokenNamereturn:
3959                         return "return"; //$NON-NLS-1$
3960                 case TokenNamestatic:
3961                         return "static"; //$NON-NLS-1$
3962                 case TokenNameswitch:
3963                         return "switch"; //$NON-NLS-1$
3964                 // case TokenNametrue :
3965                 // return "true"; //$NON-NLS-1$
3966                 case TokenNameunset:
3967                         return "unset"; //$NON-NLS-1$
3968                 case TokenNamevar:
3969                         return "var"; //$NON-NLS-1$
3970                 case TokenNamewhile:
3971                         return "while"; //$NON-NLS-1$
3972                 case TokenNamexor:
3973                         return "XOR"; //$NON-NLS-1$
3974                 // case TokenNamethis :
3975                 // return "$this"; //$NON-NLS-1$
3976                 case TokenNameIntegerLiteral:
3977                         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3978                 case TokenNameDoubleLiteral:
3979                         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3980                 case TokenNameStringDoubleQuote:
3981                         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3982                 case TokenNameStringSingleQuote:
3983                         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3984                 case TokenNameStringInterpolated:
3985                         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3986                 case TokenNameEncapsedString0:
3987                         return "`"; //$NON-NLS-1$
3988                         // case TokenNameEncapsedString1:
3989                         // return "\'"; //$NON-NLS-1$
3990                         // case TokenNameEncapsedString2:
3991                         // return "\""; //$NON-NLS-1$
3992                 case TokenNameSTRING:
3993                         return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3994                 case TokenNameHEREDOC:
3995                         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3996                 case TokenNamePLUS_PLUS:
3997                         return "++"; //$NON-NLS-1$
3998                 case TokenNameMINUS_MINUS:
3999                         return "--"; //$NON-NLS-1$
4000                 case TokenNameEQUAL_EQUAL:
4001                         return "=="; //$NON-NLS-1$
4002                 case TokenNameEQUAL_EQUAL_EQUAL:
4003                         return "==="; //$NON-NLS-1$
4004                 case TokenNameEQUAL_GREATER:
4005                         return "=>"; //$NON-NLS-1$
4006                 case TokenNameLESS_EQUAL:
4007                         return "<="; //$NON-NLS-1$
4008                 case TokenNameGREATER_EQUAL:
4009                         return ">="; //$NON-NLS-1$
4010                 case TokenNameNOT_EQUAL:
4011                         return "!="; //$NON-NLS-1$
4012                 case TokenNameNOT_EQUAL_EQUAL:
4013                         return "!=="; //$NON-NLS-1$
4014                 case TokenNameLEFT_SHIFT:
4015                         return "<<"; //$NON-NLS-1$
4016                 case TokenNameRIGHT_SHIFT:
4017                         return ">>"; //$NON-NLS-1$
4018                 case TokenNamePLUS_EQUAL:
4019                         return "+="; //$NON-NLS-1$
4020                 case TokenNameMINUS_EQUAL:
4021                         return "-="; //$NON-NLS-1$
4022                 case TokenNameMULTIPLY_EQUAL:
4023                         return "*="; //$NON-NLS-1$
4024                 case TokenNameDIVIDE_EQUAL:
4025                         return "/="; //$NON-NLS-1$
4026                 case TokenNameAND_EQUAL:
4027                         return "&="; //$NON-NLS-1$
4028                 case TokenNameOR_EQUAL:
4029                         return "|="; //$NON-NLS-1$
4030                 case TokenNameXOR_EQUAL:
4031                         return "^="; //$NON-NLS-1$
4032                 case TokenNameREMAINDER_EQUAL:
4033                         return "%="; //$NON-NLS-1$
4034                 case TokenNameDOT_EQUAL:
4035                         return ".="; //$NON-NLS-1$
4036                 case TokenNameLEFT_SHIFT_EQUAL:
4037                         return "<<="; //$NON-NLS-1$
4038                 case TokenNameRIGHT_SHIFT_EQUAL:
4039                         return ">>="; //$NON-NLS-1$
4040                 case TokenNameOR_OR:
4041                         return "||"; //$NON-NLS-1$
4042                 case TokenNameAND_AND:
4043                         return "&&"; //$NON-NLS-1$
4044                 case TokenNamePLUS:
4045                         return "+"; //$NON-NLS-1$
4046                 case TokenNameMINUS:
4047                         return "-"; //$NON-NLS-1$
4048                 case TokenNameMINUS_GREATER:
4049                         return "->";
4050                 case TokenNameNOT:
4051                         return "!"; //$NON-NLS-1$
4052                 case TokenNameREMAINDER:
4053                         return "%"; //$NON-NLS-1$
4054                 case TokenNameXOR:
4055                         return "^"; //$NON-NLS-1$
4056                 case TokenNameAND:
4057                         return "&"; //$NON-NLS-1$
4058                 case TokenNameMULTIPLY:
4059                         return "*"; //$NON-NLS-1$
4060                 case TokenNameOR:
4061                         return "|"; //$NON-NLS-1$
4062                 case TokenNameTWIDDLE:
4063                         return "~"; //$NON-NLS-1$
4064                 case TokenNameTWIDDLE_EQUAL:
4065                         return "~="; //$NON-NLS-1$
4066                 case TokenNameDIVIDE:
4067                         return "/"; //$NON-NLS-1$
4068                 case TokenNameGREATER:
4069                         return ">"; //$NON-NLS-1$
4070                 case TokenNameLESS:
4071                         return "<"; //$NON-NLS-1$
4072                 case TokenNameLPAREN:
4073                         return "("; //$NON-NLS-1$
4074                 case TokenNameRPAREN:
4075                         return ")"; //$NON-NLS-1$
4076                 case TokenNameLBRACE:
4077                         return "{"; //$NON-NLS-1$
4078                 case TokenNameRBRACE:
4079                         return "}"; //$NON-NLS-1$
4080                 case TokenNameLBRACKET:
4081                         return "["; //$NON-NLS-1$
4082                 case TokenNameRBRACKET:
4083                         return "]"; //$NON-NLS-1$
4084                 case TokenNameSEMICOLON:
4085                         return ";"; //$NON-NLS-1$
4086                 case TokenNameQUESTION:
4087                         return "?"; //$NON-NLS-1$
4088                 case TokenNameCOLON:
4089                         return ":"; //$NON-NLS-1$
4090                 case TokenNameCOMMA:
4091                         return ","; //$NON-NLS-1$
4092                 case TokenNameDOT:
4093                         return "."; //$NON-NLS-1$
4094                 case TokenNameEQUAL:
4095                         return "="; //$NON-NLS-1$
4096                 case TokenNameAT:
4097                         return "@";
4098                 case TokenNameDOLLAR:
4099                         return "$";
4100                 case TokenNameDOLLAR_LBRACE:
4101                         return "${";
4102                 case TokenNameLBRACE_DOLLAR:
4103                         return "{$";
4104                 case TokenNameEOF:
4105                         return "EOF"; //$NON-NLS-1$
4106                 case TokenNameWHITESPACE:
4107                         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4108                 case TokenNameCOMMENT_LINE:
4109                         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4110                 case TokenNameCOMMENT_BLOCK:
4111                         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4112                 case TokenNameCOMMENT_PHPDOC:
4113                         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4114                 // case TokenNameHTML :
4115                 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4116                 // //$NON-NLS-1$
4117                 case TokenNameFILE:
4118                         return "__FILE__"; //$NON-NLS-1$
4119                 case TokenNameLINE:
4120                         return "__LINE__"; //$NON-NLS-1$
4121                 case TokenNameCLASS_C:
4122                         return "__CLASS__"; //$NON-NLS-1$
4123                 case TokenNameMETHOD_C:
4124                         return "__METHOD__"; //$NON-NLS-1$
4125                 case TokenNameFUNC_C:
4126                         return "__FUNCTION__"; //$NON-NLS-1
4127                 case TokenNameboolCAST:
4128                         return "( bool )"; //$NON-NLS-1$
4129                 case TokenNameintCAST:
4130                         return "( int )"; //$NON-NLS-1$
4131                 case TokenNamedoubleCAST:
4132                         return "( double )"; //$NON-NLS-1$
4133                 case TokenNameobjectCAST:
4134                         return "( object )"; //$NON-NLS-1$
4135                 case TokenNamestringCAST:
4136                         return "( string )"; //$NON-NLS-1$
4137                 default:
4138                         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4139                 }
4140         }
4141
4142         public Scanner() {
4143                 this(false, false);
4144         }
4145
4146         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4147                 this(tokenizeComments, tokenizeWhiteSpace, false);
4148         }
4149
4150         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4151                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4152         }
4153
4154         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4155                         boolean assertMode) {
4156                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4157         }
4158
4159         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4160                         boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4161                 this.eofPosition = Integer.MAX_VALUE;
4162                 this.tokenizeComments = tokenizeComments;
4163                 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4164                 this.tokenizeStrings = tokenizeStrings;
4165                 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4166 //              this.assertMode = assertMode;
4167                 // this.encapsedStringStack = null;
4168                 this.taskTags = taskTags;
4169                 this.taskPriorities = taskPriorities;
4170         }
4171
4172         private void checkNonExternalizeString() throws InvalidInputException {
4173                 if (currentLine == null)
4174                         return;
4175                 parseTags(currentLine);
4176         }
4177
4178         private void parseTags(NLSLine line) throws InvalidInputException {
4179                 String s = new String(getCurrentTokenSource());
4180                 int pos = s.indexOf(TAG_PREFIX);
4181                 int lineLength = line.size();
4182                 while (pos != -1) {
4183                         int start = pos + TAG_PREFIX_LENGTH;
4184                         int end = s.indexOf(TAG_POSTFIX, start);
4185                         String index = s.substring(start, end);
4186                         int i = 0;
4187                         try {
4188                                 i = Integer.parseInt(index) - 1;
4189                                 // Tags are one based not zero based.
4190                         } catch (NumberFormatException e) {
4191                                 i = -1; // we don't want to consider this as a valid NLS tag
4192                         }
4193                         if (line.exists(i)) {
4194                                 line.set(i, null);
4195                         }
4196                         pos = s.indexOf(TAG_PREFIX, start);
4197                 }
4198                 this.nonNLSStrings = new StringLiteral[lineLength];
4199                 int nonNLSCounter = 0;
4200                 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4201                         StringLiteral literal = (StringLiteral) iterator.next();
4202                         if (literal != null) {
4203                                 this.nonNLSStrings[nonNLSCounter++] = literal;
4204                         }
4205                 }
4206                 if (nonNLSCounter == 0) {
4207                         this.nonNLSStrings = null;
4208                         currentLine = null;
4209                         return;
4210                 }
4211                 this.wasNonExternalizedStringLiteral = true;
4212                 if (nonNLSCounter != lineLength) {
4213                         System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4214                 }
4215                 currentLine = null;
4216         }
4217
4218         public final void scanEscapeCharacter() throws InvalidInputException {
4219                 // the string with "\\u" is a legal string of two chars \ and u
4220                 // thus we use a direct access to the source (for regular cases).
4221                 if (unicodeAsBackSlash) {
4222                         // consume next character
4223                         unicodeAsBackSlash = false;
4224                         // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4225                         // (source[currentPosition] == 'u')) {
4226                         // getNextUnicodeChar();
4227                         // } else {
4228                         if (withoutUnicodePtr != 0) {
4229                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4230                                 // }
4231                         }
4232                 } else
4233                         currentCharacter = source[currentPosition++];
4234                 switch (currentCharacter) {
4235                 case 'b':
4236                         currentCharacter = '\b';
4237                         break;
4238                 case 't':
4239                         currentCharacter = '\t';
4240                         break;
4241                 case 'n':
4242                         currentCharacter = '\n';
4243                         break;
4244                 case 'f':
4245                         currentCharacter = '\f';
4246                         break;
4247                 case 'r':
4248                         currentCharacter = '\r';
4249                         break;
4250                 case '\"':
4251                         currentCharacter = '\"';
4252                         break;
4253                 case '\'':
4254                         currentCharacter = '\'';
4255                         break;
4256                 case '\\':
4257                         currentCharacter = '\\';
4258                         break;
4259                 default:
4260                         // -----------octal escape--------------
4261                         // OctalDigit
4262                         // OctalDigit OctalDigit
4263                         // ZeroToThree OctalDigit OctalDigit
4264                         int number = Character.getNumericValue(currentCharacter);
4265                         if (number >= 0 && number <= 7) {
4266                                 boolean zeroToThreeNot = number > 3;
4267                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4268                                         int digit = Character.getNumericValue(currentCharacter);
4269                                         if (digit >= 0 && digit <= 7) {
4270                                                 number = (number * 8) + digit;
4271                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4272                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4273                                                                 // Digit --> ignore last character
4274                                                                 currentPosition--;
4275                                                         } else {
4276                                                                 digit = Character.getNumericValue(currentCharacter);
4277                                                                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4278                                                                         // OctalDigit OctalDigit
4279                                                                         number = (number * 8) + digit;
4280                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4281                                                                         // --> ignore last character
4282                                                                         currentPosition--;
4283                                                                 }
4284                                                         }
4285                                                 } else { // has read \OctalDigit NonDigit--> ignore last
4286                                                         // character
4287                                                         currentPosition--;
4288                                                 }
4289                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
4290                                                 // character
4291                                                 currentPosition--;
4292                                         }
4293                                 } else { // has read \OctalDigit --> ignore last character
4294                                         currentPosition--;
4295                                 }
4296                                 if (number > 255)
4297                                         throw new InvalidInputException(INVALID_ESCAPE);
4298                                 currentCharacter = (char) number;
4299                         } else
4300                                 throw new InvalidInputException(INVALID_ESCAPE);
4301                 }
4302         }
4303
4304         // chech presence of task: tags
4305         // TODO (frederic) see if we need to take unicode characters into account...
4306         public void checkTaskTag(int commentStart, int commentEnd) {
4307                 char[] src = this.source;
4308
4309                 // only look for newer task: tags
4310                 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4311                         return;
4312                 }
4313                 int foundTaskIndex = this.foundTaskCount;
4314                 char previous = src[commentStart + 1]; // should be '*' or '/'
4315                 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4316                         char[] tag = null;
4317                         char[] priority = null;
4318                         // check for tag occurrence only if not ambiguous with javadoc tag
4319                         if (previous != '@') {
4320                                 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4321                                         tag = this.taskTags[itag];
4322                                         int tagLength = tag.length;
4323                                         if (tagLength == 0)
4324                                                 continue nextTag;
4325
4326                                         // ensure tag is not leaded with letter if tag starts with a letter
4327                                         if (Scanner.isPHPIdentifierStart(tag[0])) {
4328                                                 if (Scanner.isPHPIdentifierPart(previous)) {
4329                                                         continue nextTag;
4330                                                 }
4331                                         }
4332
4333                                         for (int t = 0; t < tagLength; t++) {
4334                                                 char sc, tc;
4335                                                 int x = i + t;
4336                                                 if (x >= this.eofPosition || x >= commentEnd)
4337                                                         continue nextTag;
4338                                                 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4339                                                         if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4340                                                                                                                                                                                                                                                                                                                                                                                                                                         // insensitive
4341                                                                                                                                                                                                                                                                                                                                                                                                                                         // check
4342                                                                 continue nextTag;
4343                                                         }
4344                                                 }
4345                                         }
4346                                         // ensure tag is not followed with letter if tag finishes with a
4347                                         // letter
4348                                         if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4349                                                 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4350                                                         continue nextTag;
4351                                         }
4352                                         if (this.foundTaskTags == null) {
4353                                                 this.foundTaskTags = new char[5][];
4354                                                 this.foundTaskMessages = new char[5][];
4355                                                 this.foundTaskPriorities = new char[5][];
4356                                                 this.foundTaskPositions = new int[5][];
4357                                         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4358                                                 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4359                                                                 this.foundTaskCount);
4360                                                 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4361                                                                 this.foundTaskCount);
4362                                                 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4363                                                                 this.foundTaskCount);
4364                                                 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4365                                                                 this.foundTaskCount);
4366                                         }
4367
4368                                         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4369
4370                                         this.foundTaskTags[this.foundTaskCount] = tag;
4371                                         this.foundTaskPriorities[this.foundTaskCount] = priority;
4372                                         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4373                                         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4374                                         this.foundTaskCount++;
4375                                         i += tagLength - 1; // will be incremented when looping
4376                                         break nextTag;
4377                                 }
4378                         }
4379                         previous = src[i];
4380                 }
4381                 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4382                         // retrieve message start and end positions
4383                         int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4384                         int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4385                         // at most beginning of next task
4386                         if (max_value < msgStart) {
4387                                 max_value = msgStart; // would only occur if tag is before EOF.
4388                         }
4389                         int end = -1;
4390                         char c;
4391                         for (int j = msgStart; j < max_value; j++) {
4392                                 if ((c = src[j]) == '\n' || c == '\r') {
4393                                         end = j - 1;
4394                                         break;
4395                                 }
4396                         }
4397                         if (end == -1) {
4398                                 for (int j = max_value; j > msgStart; j--) {
4399                                         if ((c = src[j]) == '*') {
4400                                                 end = j - 1;
4401                                                 break;
4402                                         }
4403                                 }
4404                                 if (end == -1)
4405                                         end = max_value;
4406                         }
4407                         if (msgStart == end)
4408                                 continue; // empty
4409                         // trim the message
4410                         while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4411                                 end--;
4412                         while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4413                                 msgStart++;
4414                         // update the end position of the task
4415                         this.foundTaskPositions[i][1] = end;
4416                         // get the message source
4417                         final int messageLength = end - msgStart + 1;
4418                         char[] message = new char[messageLength];
4419                         System.arraycopy(src, msgStart, message, 0, messageLength);
4420                         this.foundTaskMessages[i] = message;
4421                 }
4422         }
4423
4424         // chech presence of task: tags
4425         // public void checkTaskTag(int commentStart, int commentEnd) {
4426         // // only look for newer task: tags
4427         // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4428         // - 1][0] >= commentStart) {
4429         // return;
4430         // }
4431         // int foundTaskIndex = this.foundTaskCount;
4432         // nextChar: for (int i = commentStart; i < commentEnd && i <
4433         // this.eofPosition; i++) {
4434         // char[] tag = null;
4435         // char[] priority = null;
4436         // // check for tag occurrence
4437         // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4438         // tag = this.taskTags[itag];
4439         // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4440         // ? this.taskPriorities[itag] : null;
4441         // int tagLength = tag.length;
4442         // for (int t = 0; t < tagLength; t++) {
4443         // if (this.source[i + t] != tag[t])
4444         // continue nextTag;
4445         // }
4446         // if (this.foundTaskTags == null) {
4447         // this.foundTaskTags = new char[5][];
4448         // this.foundTaskMessages = new char[5][];
4449         // this.foundTaskPriorities = new char[5][];
4450         // this.foundTaskPositions = new int[5][];
4451         // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4452         // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4453         // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4454         // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4455         // char[this.foundTaskCount * 2][], 0,
4456         // this.foundTaskCount);
4457         // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4458         // new char[this.foundTaskCount * 2][], 0,
4459         // this.foundTaskCount);
4460         // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4461         // int[this.foundTaskCount * 2][], 0,
4462         // this.foundTaskCount);
4463         // }
4464         // this.foundTaskTags[this.foundTaskCount] = tag;
4465         // this.foundTaskPriorities[this.foundTaskCount] = priority;
4466         // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4467         // - 1 };
4468         // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4469         // this.foundTaskCount++;
4470         // i += tagLength - 1; // will be incremented when looping
4471         // }
4472         // }
4473         // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4474         // // retrieve message start and end positions
4475         // int msgStart = this.foundTaskPositions[i][0] +
4476         // this.foundTaskTags[i].length;
4477         // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4478         // 1][0] - 1 : commentEnd - 1;
4479         // // at most beginning of next task
4480         // if (max_value < msgStart)
4481         // max_value = msgStart; // would only occur if tag is before EOF.
4482         // int end = -1;
4483         // char c;
4484         // for (int j = msgStart; j < max_value; j++) {
4485         // if ((c = this.source[j]) == '\n' || c == '\r') {
4486         // end = j - 1;
4487         // break;
4488         // }
4489         // }
4490         // if (end == -1) {
4491         // for (int j = max_value; j > msgStart; j--) {
4492         // if ((c = this.source[j]) == '*') {
4493         // end = j - 1;
4494         // break;
4495         // }
4496         // }
4497         // if (end == -1)
4498         // end = max_value;
4499         // }
4500         // if (msgStart == end)
4501         // continue; // empty
4502         // // trim the message
4503         // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4504         // end--;
4505         // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4506         // msgStart++;
4507         // // update the end position of the task
4508         // this.foundTaskPositions[i][1] = end;
4509         // // get the message source
4510         // final int messageLength = end - msgStart + 1;
4511         // char[] message = new char[messageLength];
4512         // System.arraycopy(source, msgStart, message, 0, messageLength);
4513         // this.foundTaskMessages[i] = message;
4514         // }
4515         // }
4516 }