Improved support for comment folding
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /***********************************************************************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
5  *
6  * Contributors: IBM Corporation - initial API and implementation
7  **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
9
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20
21 public class Scanner implements IScanner, ITerminalSymbols {
22         /*
23          * APIs ares - getNextToken() which return the current type of the token (this
24          * value is not memorized by the scanner) - getCurrentTokenSource() which
25          * provides with the token "REAL" source (aka all unicode have been
26          * transformed into a correct char) - sourceStart gives the position into the
27          * stream - currentPosition-1 gives the sourceEnd position into the stream
28          */
29         // 1.4 feature
30         // private boolean assertMode;
31         public boolean useAssertAsAnIndentifier = false;
32
33         // flag indicating if processed source contains occurrences of keyword assert
34         public boolean containsAssertKeyword = false;
35
36         public boolean recordLineSeparator;
37
38         public boolean ignorePHPOneLiner = false;
39
40         public boolean phpMode = false;
41
42         public boolean phpExpressionTag = false;
43
44         // public Stack encapsedStringStack = null;
45
46         public char currentCharacter;
47
48         public int startPosition;
49
50         public int currentPosition;
51
52         public int initialPosition, eofPosition;
53
54         // after this position eof are generated instead of real token from the
55         // source
56         public boolean tokenizeComments;
57
58         public boolean tokenizeWhiteSpace;
59
60         public boolean tokenizeStrings;
61
62         // source should be viewed as a window (aka a part)
63         // of a entire very large stream
64         public char source[];
65
66         // unicode support
67         public char[] withoutUnicodeBuffer;
68
69         public int withoutUnicodePtr;
70
71         // when == 0 ==> no unicode in the current token
72         public boolean unicodeAsBackSlash = false;
73
74         public boolean scanningFloatLiteral = false;
75
76         // support for /** comments
77         public int[] commentStops = new int[10];
78
79         public int[] commentStarts = new int[10];
80
81         public int commentPtr = -1; // no comment test with commentPtr value -1
82
83         protected int lastCommentLinePosition = -1;
84
85         // diet parsing support - jump over some method body when requested
86         public boolean diet = false;
87
88         // support for the poor-line-debuggers ....
89         // remember the position of the cr/lf
90         public int[] lineEnds = new int[250];
91
92         public int linePtr = -1;
93
94         public boolean wasAcr = false;
95
96         public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
97
98         public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
99
100         public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
101
102         public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
103
104         public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
105
106         public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
107
108         public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
109
110         public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
111
112         public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
113
114         public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
115
116         public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
117
118         public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
119
120         // ----------------optimized identifier managment------------------
121         static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
122                         charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
123                         charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
124                         charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
125                         charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
126                         charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
127                         charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
128                         charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
129                         charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
130
131         static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
132                         '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
133                         'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
134                         charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
135                         charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
136                         charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
137                         charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
138                         charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
139                         charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
140
141         public final static int MAX_OBVIOUS = 256;
142
143         static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
144
145         public final static int C_DOLLAR = 8;
146
147         public final static int C_LETTER = 4;
148
149         public final static int C_DIGIT = 3;
150
151         public final static int C_SEPARATOR = 2;
152
153         public final static int C_SPACE = 1;
154         static {
155                 for (int i = '0'; i <= '9'; i++)
156                         ObviousIdentCharNatures[i] = C_DIGIT;
157
158                 for (int i = 'a'; i <= 'z'; i++)
159                         ObviousIdentCharNatures[i] = C_LETTER;
160                 for (int i = 'A'; i <= 'Z'; i++)
161                         ObviousIdentCharNatures[i] = C_LETTER;
162                 ObviousIdentCharNatures['_'] = C_LETTER;
163                 for (int i = 127; i <= 255; i++)
164                         ObviousIdentCharNatures[i] = C_LETTER;
165
166                 ObviousIdentCharNatures['$'] = C_DOLLAR;
167
168                 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
169                 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
170                 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
171                 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
172                 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
173
174                 ObviousIdentCharNatures['.'] = C_SEPARATOR;
175                 ObviousIdentCharNatures[':'] = C_SEPARATOR;
176                 ObviousIdentCharNatures[';'] = C_SEPARATOR;
177                 ObviousIdentCharNatures[','] = C_SEPARATOR;
178                 ObviousIdentCharNatures['['] = C_SEPARATOR;
179                 ObviousIdentCharNatures[']'] = C_SEPARATOR;
180                 ObviousIdentCharNatures['('] = C_SEPARATOR;
181                 ObviousIdentCharNatures[')'] = C_SEPARATOR;
182                 ObviousIdentCharNatures['{'] = C_SEPARATOR;
183                 ObviousIdentCharNatures['}'] = C_SEPARATOR;
184                 ObviousIdentCharNatures['+'] = C_SEPARATOR;
185                 ObviousIdentCharNatures['-'] = C_SEPARATOR;
186                 ObviousIdentCharNatures['*'] = C_SEPARATOR;
187                 ObviousIdentCharNatures['/'] = C_SEPARATOR;
188                 ObviousIdentCharNatures['='] = C_SEPARATOR;
189                 ObviousIdentCharNatures['&'] = C_SEPARATOR;
190                 ObviousIdentCharNatures['|'] = C_SEPARATOR;
191                 ObviousIdentCharNatures['?'] = C_SEPARATOR;
192                 ObviousIdentCharNatures['<'] = C_SEPARATOR;
193                 ObviousIdentCharNatures['>'] = C_SEPARATOR;
194                 ObviousIdentCharNatures['!'] = C_SEPARATOR;
195                 ObviousIdentCharNatures['%'] = C_SEPARATOR;
196                 ObviousIdentCharNatures['^'] = C_SEPARATOR;
197                 ObviousIdentCharNatures['~'] = C_SEPARATOR;
198                 ObviousIdentCharNatures['"'] = C_SEPARATOR;
199                 ObviousIdentCharNatures['\''] = C_SEPARATOR;
200         }
201
202         static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
203
204         static final int TableSize = 30, InternalTableSize = 6;
205
206         // 30*6 = 180 entries
207         public static final int OptimizedLength = 6;
208
209         public/* static */
210         final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
211
212         // support for detecting non-externalized string literals
213         int currentLineNr = -1;
214
215         int previousLineNr = -1;
216
217         NLSLine currentLine = null;
218
219         List lines = new ArrayList();
220
221         public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
222
223         public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
224
225         public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
226
227         public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
228
229         public StringLiteral[] nonNLSStrings = null;
230
231         public boolean checkNonExternalizedStringLiterals = true;
232
233         public boolean wasNonExternalizedStringLiteral = false;
234
235         /* static */{
236                 for (int i = 0; i < 6; i++) {
237                         for (int j = 0; j < TableSize; j++) {
238                                 for (int k = 0; k < InternalTableSize; k++) {
239                                         charArray_length[i][j][k] = initCharArray;
240                                 }
241                         }
242                 }
243         }
244
245         static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
246
247         public static final int RoundBracket = 0;
248
249         public static final int SquareBracket = 1;
250
251         public static final int CurlyBracket = 2;
252
253         public static final int BracketKinds = 3;
254
255         // task tag support
256         public char[][] foundTaskTags = null;
257
258         public char[][] foundTaskMessages;
259
260         public char[][] foundTaskPriorities = null;
261
262         public int[][] foundTaskPositions;
263
264         public int foundTaskCount = 0;
265
266         public char[][] taskTags = null;
267
268         public char[][] taskPriorities = null;
269
270         public boolean isTaskCaseSensitive = true;
271
272         public static final boolean DEBUG = false;
273
274         public static final boolean TRACE = false;
275
276         public ICompilationUnit compilationUnit = null;
277
278         /**
279          * Determines if the specified character is permissible as the first character
280          * in a PHP identifier or variable
281          *
282          * The '$' character for PHP variables is regarded as a correct first
283          * character !
284          *
285          */
286         public static boolean isPHPIdentOrVarStart(char ch) {
287                 if (ch < MAX_OBVIOUS) {
288                         return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
289                 }
290                 return false;
291                 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
292                 // ch && ch <= 0xFF);
293         }
294
295         /**
296          * Determines if the specified character is permissible as the first character
297          * in a PHP identifier.
298          *
299          * The '$' character for PHP variables isn't regarded as the first character !
300          */
301         public static boolean isPHPIdentifierStart(char ch) {
302                 if (ch < MAX_OBVIOUS) {
303                         return ObviousIdentCharNatures[ch] == C_LETTER;
304                 }
305                 return false;
306                 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
307                 // 0xFF);
308         }
309
310         /**
311          * Determines if the specified character may be part of a PHP identifier as
312          * other than the first character
313          */
314         public static boolean isPHPIdentifierPart(char ch) {
315                 if (ch < MAX_OBVIOUS) {
316                         return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
317                 }
318                 return false;
319                 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
320                 // <= 0xFF);
321         }
322
323         public static boolean isSQLIdentifierPart(char ch) {
324                 if (ch < MAX_OBVIOUS) {
325                         return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
326                 }
327                 return false;
328         }
329
330         public final boolean atEnd() {
331                 // This code is not relevant if source is
332                 // Only a part of the real stream input
333                 return source.length == currentPosition;
334         }
335
336         public char[] getCurrentIdentifierSource() {
337                 // return the token REAL source (aka unicodes are precomputed)
338                 char[] result;
339                 // if (withoutUnicodePtr != 0)
340                 // //0 is used as a fast test flag so the real first char is in position 1
341                 // System.arraycopy(
342                 // withoutUnicodeBuffer,
343                 // 1,
344                 // result = new char[withoutUnicodePtr],
345                 // 0,
346                 // withoutUnicodePtr);
347                 // else {
348                 int length = currentPosition - startPosition;
349                 switch (length) { // see OptimizedLength
350                 case 1:
351                         return optimizedCurrentTokenSource1();
352                 case 2:
353                         return optimizedCurrentTokenSource2();
354                 case 3:
355                         return optimizedCurrentTokenSource3();
356                 case 4:
357                         return optimizedCurrentTokenSource4();
358                 case 5:
359                         return optimizedCurrentTokenSource5();
360                 case 6:
361                         return optimizedCurrentTokenSource6();
362                 }
363                 // no optimization
364                 System.arraycopy(source, startPosition, result = new char[length], 0, length);
365                 // }
366                 return result;
367         }
368
369         public int getCurrentTokenEndPosition() {
370                 return this.currentPosition - 1;
371         }
372
373         public final char[] getCurrentTokenSource() {
374                 // Return the token REAL source (aka unicodes are precomputed)
375                 char[] result;
376                 // if (withoutUnicodePtr != 0)
377                 // // 0 is used as a fast test flag so the real first char is in position 1
378                 // System.arraycopy(
379                 // withoutUnicodeBuffer,
380                 // 1,
381                 // result = new char[withoutUnicodePtr],
382                 // 0,
383                 // withoutUnicodePtr);
384                 // else {
385                 int length;
386                 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
387                 // }
388                 return result;
389         }
390
391         public final char[] getCurrentTokenSource(int startPos) {
392                 // Return the token REAL source (aka unicodes are precomputed)
393                 char[] result;
394                 // if (withoutUnicodePtr != 0)
395                 // // 0 is used as a fast test flag so the real first char is in position 1
396                 // System.arraycopy(
397                 // withoutUnicodeBuffer,
398                 // 1,
399                 // result = new char[withoutUnicodePtr],
400                 // 0,
401                 // withoutUnicodePtr);
402                 // else {
403                 int length;
404                 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
405                 // }
406                 return result;
407         }
408
409         public final char[] getCurrentTokenSourceString() {
410                 // return the token REAL source (aka unicodes are precomputed).
411                 // REMOVE the two " that are at the beginning and the end.
412                 char[] result;
413                 if (withoutUnicodePtr != 0)
414                         // 0 is used as a fast test flag so the real first char is in position 1
415                         System.arraycopy(withoutUnicodeBuffer, 2,
416                         // 2 is 1 (real start) + 1 (to jump over the ")
417                                         result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
418                 else {
419                         int length;
420                         System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
421                 }
422                 return result;
423         }
424
425         public final boolean equalsCurrentTokenSource(char[] word) {
426                 if (word.length != currentPosition - startPosition) {
427                         return false;
428                 }
429                 for (int i = 0; i < word.length; i++) {
430                         if (word[i] != source[startPosition + i]) {
431                                 return false;
432                         }
433                 }
434                 return true;
435         }
436
437         public final char[] getRawTokenSourceEnd() {
438                 int length = this.eofPosition - this.currentPosition - 1;
439                 char[] sourceEnd = new char[length];
440                 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
441                 return sourceEnd;
442         }
443
444         public int getCurrentTokenStartPosition() {
445                 return this.startPosition;
446         }
447
448         public final String getCurrentStringLiteral() {
449                 char[] result = getCurrentStringLiteralSource();
450                 return new String(result);
451         }
452
453         public final char[] getCurrentStringLiteralSource() {
454                 // Return the token REAL source (aka unicodes are precomputed)
455                 if (startPosition + 1 >= currentPosition) {
456                         return new char[0];
457                 }
458                 char[] result;
459                 int length;
460                 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
461                 // }
462                 return result;
463         }
464
465         public final char[] getCurrentStringLiteralSource(int startPos) {
466                 // Return the token REAL source (aka unicodes are precomputed)
467                 char[] result;
468                 int length;
469                 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
470                 // }
471                 return result;
472         }
473
474         /*
475          * Search the source position corresponding to the end of a given line number
476          *
477          * Line numbers are 1-based, and relative to the scanner initialPosition.
478          * Character positions are 0-based.
479          *
480          * In case the given line number is inconsistent, answers -1.
481          */
482         public final int getLineEnd(int lineNumber) {
483                 if (lineEnds == null)
484                         return -1;
485                 if (lineNumber >= lineEnds.length)
486                         return -1;
487                 if (lineNumber <= 0)
488                         return -1;
489                 if (lineNumber == lineEnds.length - 1)
490                         return eofPosition;
491                 return lineEnds[lineNumber - 1];
492                 // next line start one character behind the lineEnd of the previous line
493         }
494
495         /**
496          * Search the source position corresponding to the beginning of a given line
497          * number
498          *
499          * Line numbers are 1-based, and relative to the scanner initialPosition.
500          * Character positions are 0-based.
501          *
502          * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
503          *
504          * In case the given line number is inconsistent, answers -1.
505          */
506         public final int getLineStart(int lineNumber) {
507                 if (lineEnds == null)
508                         return -1;
509                 if (lineNumber >= lineEnds.length)
510                         return -1;
511                 if (lineNumber <= 0)
512                         return -1;
513                 if (lineNumber == 1)
514                         return initialPosition;
515                 return lineEnds[lineNumber - 2] + 1;
516                 // next line start one character behind the lineEnd of the previous line
517         }
518
519         public final boolean getNextChar(char testedChar) {
520                 // BOOLEAN
521                 // handle the case of unicode.
522                 // when a unicode appears then we must use a buffer that holds char
523                 // internal values
524                 // At the end of this method currentCharacter holds the new visited char
525                 // and currentPosition points right next after it
526                 // Both previous lines are true if the currentCharacter is == to the
527                 // testedChar
528                 // On false, no side effect has occured.
529                 // ALL getNextChar.... ARE OPTIMIZED COPIES
530                 int temp = currentPosition;
531                 try {
532                         currentCharacter = source[currentPosition++];
533                         // if (((currentCharacter = source[currentPosition++]) == '\\')
534                         // && (source[currentPosition] == 'u')) {
535                         // //-------------unicode traitement ------------
536                         // int c1, c2, c3, c4;
537                         // int unicodeSize = 6;
538                         // currentPosition++;
539                         // while (source[currentPosition] == 'u') {
540                         // currentPosition++;
541                         // unicodeSize++;
542                         // }
543                         //
544                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
545                         // || c1 < 0)
546                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
547                         // || c2 < 0)
548                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
549                         // || c3 < 0)
550                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
551                         // || c4 < 0)) {
552                         // currentPosition = temp;
553                         // return false;
554                         // }
555                         //
556                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
557                         // if (currentCharacter != testedChar) {
558                         // currentPosition = temp;
559                         // return false;
560                         // }
561                         // unicodeAsBackSlash = currentCharacter == '\\';
562                         //
563                         // //need the unicode buffer
564                         // if (withoutUnicodePtr == 0) {
565                         // //buffer all the entries that have been left aside....
566                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
567                         // System.arraycopy(
568                         // source,
569                         // startPosition,
570                         // withoutUnicodeBuffer,
571                         // 1,
572                         // withoutUnicodePtr);
573                         // }
574                         // //fill the buffer with the char
575                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
576                         // return true;
577                         //
578                         // } //-------------end unicode traitement--------------
579                         // else {
580                         if (currentCharacter != testedChar) {
581                                 currentPosition = temp;
582                                 return false;
583                         }
584                         unicodeAsBackSlash = false;
585                         // if (withoutUnicodePtr != 0)
586                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
587                         return true;
588                         // }
589                 } catch (IndexOutOfBoundsException e) {
590                         unicodeAsBackSlash = false;
591                         currentPosition = temp;
592                         return false;
593                 }
594         }
595
596         public final int getNextChar(char testedChar1, char testedChar2) {
597                 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
598                 // test can be done with (x==0) for the first and (x>0) for the second
599                 // handle the case of unicode.
600                 // when a unicode appears then we must use a buffer that holds char
601                 // internal values
602                 // At the end of this method currentCharacter holds the new visited char
603                 // and currentPosition points right next after it
604                 // Both previous lines are true if the currentCharacter is == to the
605                 // testedChar1/2
606                 // On false, no side effect has occured.
607                 // ALL getNextChar.... ARE OPTIMIZED COPIES
608                 int temp = currentPosition;
609                 try {
610                         int result;
611                         currentCharacter = source[currentPosition++];
612                         // if (((currentCharacter = source[currentPosition++]) == '\\')
613                         // && (source[currentPosition] == 'u')) {
614                         // //-------------unicode traitement ------------
615                         // int c1, c2, c3, c4;
616                         // int unicodeSize = 6;
617                         // currentPosition++;
618                         // while (source[currentPosition] == 'u') {
619                         // currentPosition++;
620                         // unicodeSize++;
621                         // }
622                         //
623                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
624                         // || c1 < 0)
625                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
626                         // || c2 < 0)
627                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
628                         // || c3 < 0)
629                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
630                         // || c4 < 0)) {
631                         // currentPosition = temp;
632                         // return 2;
633                         // }
634                         //
635                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
636                         // if (currentCharacter == testedChar1)
637                         // result = 0;
638                         // else if (currentCharacter == testedChar2)
639                         // result = 1;
640                         // else {
641                         // currentPosition = temp;
642                         // return -1;
643                         // }
644                         //
645                         // //need the unicode buffer
646                         // if (withoutUnicodePtr == 0) {
647                         // //buffer all the entries that have been left aside....
648                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
649                         // System.arraycopy(
650                         // source,
651                         // startPosition,
652                         // withoutUnicodeBuffer,
653                         // 1,
654                         // withoutUnicodePtr);
655                         // }
656                         // //fill the buffer with the char
657                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
658                         // return result;
659                         // } //-------------end unicode traitement--------------
660                         // else {
661                         if (currentCharacter == testedChar1)
662                                 result = 0;
663                         else if (currentCharacter == testedChar2)
664                                 result = 1;
665                         else {
666                                 currentPosition = temp;
667                                 return -1;
668                         }
669                         // if (withoutUnicodePtr != 0)
670                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
671                         return result;
672                         // }
673                 } catch (IndexOutOfBoundsException e) {
674                         currentPosition = temp;
675                         return -1;
676                 }
677         }
678
679         public final boolean getNextCharAsDigit() {
680                 // BOOLEAN
681                 // handle the case of unicode.
682                 // when a unicode appears then we must use a buffer that holds char
683                 // internal values
684                 // At the end of this method currentCharacter holds the new visited char
685                 // and currentPosition points right next after it
686                 // Both previous lines are true if the currentCharacter is a digit
687                 // On false, no side effect has occured.
688                 // ALL getNextChar.... ARE OPTIMIZED COPIES
689                 int temp = currentPosition;
690                 try {
691                         currentCharacter = source[currentPosition++];
692                         // if (((currentCharacter = source[currentPosition++]) == '\\')
693                         // && (source[currentPosition] == 'u')) {
694                         // //-------------unicode traitement ------------
695                         // int c1, c2, c3, c4;
696                         // int unicodeSize = 6;
697                         // currentPosition++;
698                         // while (source[currentPosition] == 'u') {
699                         // currentPosition++;
700                         // unicodeSize++;
701                         // }
702                         //
703                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
704                         // || c1 < 0)
705                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
706                         // || c2 < 0)
707                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
708                         // || c3 < 0)
709                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
710                         // || c4 < 0)) {
711                         // currentPosition = temp;
712                         // return false;
713                         // }
714                         //
715                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
716                         // if (!Character.isDigit(currentCharacter)) {
717                         // currentPosition = temp;
718                         // return false;
719                         // }
720                         //
721                         // //need the unicode buffer
722                         // if (withoutUnicodePtr == 0) {
723                         // //buffer all the entries that have been left aside....
724                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
725                         // System.arraycopy(
726                         // source,
727                         // startPosition,
728                         // withoutUnicodeBuffer,
729                         // 1,
730                         // withoutUnicodePtr);
731                         // }
732                         // //fill the buffer with the char
733                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
734                         // return true;
735                         // } //-------------end unicode traitement--------------
736                         // else {
737                         if (!Character.isDigit(currentCharacter)) {
738                                 currentPosition = temp;
739                                 return false;
740                         }
741                         // if (withoutUnicodePtr != 0)
742                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
743                         return true;
744                         // }
745                 } catch (IndexOutOfBoundsException e) {
746                         currentPosition = temp;
747                         return false;
748                 }
749         }
750
751         public final boolean getNextCharAsDigit(int radix) {
752                 // BOOLEAN
753                 // handle the case of unicode.
754                 // when a unicode appears then we must use a buffer that holds char
755                 // internal values
756                 // At the end of this method currentCharacter holds the new visited char
757                 // and currentPosition points right next after it
758                 // Both previous lines are true if the currentCharacter is a digit base on
759                 // radix
760                 // On false, no side effect has occured.
761                 // ALL getNextChar.... ARE OPTIMIZED COPIES
762                 int temp = currentPosition;
763                 try {
764                         currentCharacter = source[currentPosition++];
765                         // if (((currentCharacter = source[currentPosition++]) == '\\')
766                         // && (source[currentPosition] == 'u')) {
767                         // //-------------unicode traitement ------------
768                         // int c1, c2, c3, c4;
769                         // int unicodeSize = 6;
770                         // currentPosition++;
771                         // while (source[currentPosition] == 'u') {
772                         // currentPosition++;
773                         // unicodeSize++;
774                         // }
775                         //
776                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
777                         // || c1 < 0)
778                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
779                         // || c2 < 0)
780                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
781                         // || c3 < 0)
782                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
783                         // || c4 < 0)) {
784                         // currentPosition = temp;
785                         // return false;
786                         // }
787                         //
788                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
789                         // if (Character.digit(currentCharacter, radix) == -1) {
790                         // currentPosition = temp;
791                         // return false;
792                         // }
793                         //
794                         // //need the unicode buffer
795                         // if (withoutUnicodePtr == 0) {
796                         // //buffer all the entries that have been left aside....
797                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
798                         // System.arraycopy(
799                         // source,
800                         // startPosition,
801                         // withoutUnicodeBuffer,
802                         // 1,
803                         // withoutUnicodePtr);
804                         // }
805                         // //fill the buffer with the char
806                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
807                         // return true;
808                         // } //-------------end unicode traitement--------------
809                         // else {
810                         if (Character.digit(currentCharacter, radix) == -1) {
811                                 currentPosition = temp;
812                                 return false;
813                         }
814                         // if (withoutUnicodePtr != 0)
815                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
816                         return true;
817                         // }
818                 } catch (IndexOutOfBoundsException e) {
819                         currentPosition = temp;
820                         return false;
821                 }
822         }
823
824         public boolean getNextCharAsJavaIdentifierPart() {
825                 // BOOLEAN
826                 // handle the case of unicode.
827                 // when a unicode appears then we must use a buffer that holds char
828                 // internal values
829                 // At the end of this method currentCharacter holds the new visited char
830                 // and currentPosition points right next after it
831                 // Both previous lines are true if the currentCharacter is a
832                 // JavaIdentifierPart
833                 // On false, no side effect has occured.
834                 // ALL getNextChar.... ARE OPTIMIZED COPIES
835                 int temp = currentPosition;
836                 try {
837                         currentCharacter = source[currentPosition++];
838                         // if (((currentCharacter = source[currentPosition++]) == '\\')
839                         // && (source[currentPosition] == 'u')) {
840                         // //-------------unicode traitement ------------
841                         // int c1, c2, c3, c4;
842                         // int unicodeSize = 6;
843                         // currentPosition++;
844                         // while (source[currentPosition] == 'u') {
845                         // currentPosition++;
846                         // unicodeSize++;
847                         // }
848                         //
849                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
850                         // || c1 < 0)
851                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
852                         // || c2 < 0)
853                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
854                         // || c3 < 0)
855                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
856                         // || c4 < 0)) {
857                         // currentPosition = temp;
858                         // return false;
859                         // }
860                         //
861                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
862                         // if (!isPHPIdentifierPart(currentCharacter)) {
863                         // currentPosition = temp;
864                         // return false;
865                         // }
866                         //
867                         // //need the unicode buffer
868                         // if (withoutUnicodePtr == 0) {
869                         // //buffer all the entries that have been left aside....
870                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
871                         // System.arraycopy(
872                         // source,
873                         // startPosition,
874                         // withoutUnicodeBuffer,
875                         // 1,
876                         // withoutUnicodePtr);
877                         // }
878                         // //fill the buffer with the char
879                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
880                         // return true;
881                         // } //-------------end unicode traitement--------------
882                         // else {
883                         if (!isPHPIdentifierPart(currentCharacter)) {
884                                 currentPosition = temp;
885                                 return false;
886                         }
887                         // if (withoutUnicodePtr != 0)
888                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
889                         return true;
890                         // }
891                 } catch (IndexOutOfBoundsException e) {
892                         currentPosition = temp;
893                         return false;
894                 }
895         }
896
897         public int getCastOrParen() {
898                 int tempPosition = currentPosition;
899                 char tempCharacter = currentCharacter;
900                 int tempToken = TokenNameLPAREN;
901                 boolean found = false;
902                 StringBuffer buf = new StringBuffer();
903                 try {
904                         do {
905                                 currentCharacter = source[currentPosition++];
906                         } while (currentCharacter == ' ' || currentCharacter == '\t');
907                         while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
908                                 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
909                                 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
910                                 buf.append(currentCharacter);
911                                 currentCharacter = source[currentPosition++];
912                         }
913                         if (buf.length() >= 3 && buf.length() <= 7) {
914                                 char[] data = buf.toString().toCharArray();
915                                 int index = 0;
916                                 switch (data.length) {
917                                 case 3:
918                                         // int
919                                         if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
920                                                 found = true;
921                                                 tempToken = TokenNameintCAST;
922                                         }
923                                         break;
924                                 case 4:
925                                         // bool real
926                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
927                                                 found = true;
928                                                 tempToken = TokenNameboolCAST;
929                                         } else {
930                                                 index = 0;
931                                                 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
932                                                         found = true;
933                                                         tempToken = TokenNamedoubleCAST;
934                                                 }
935                                         }
936                                         break;
937                                 case 5:
938                                         // array unset float
939                                         if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
940                                                         && (data[++index] == 'y')) {
941                                                 found = true;
942                                                 tempToken = TokenNamearrayCAST;
943                                         } else {
944                                                 index = 0;
945                                                 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
946                                                                 && (data[++index] == 't')) {
947                                                         found = true;
948                                                         tempToken = TokenNameunsetCAST;
949                                                 } else {
950                                                         index = 0;
951                                                         if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
952                                                                         && (data[++index] == 't')) {
953                                                                 found = true;
954                                                                 tempToken = TokenNamedoubleCAST;
955                                                         }
956                                                 }
957                                         }
958                                         break;
959                                 case 6:
960                                         // object string double
961                                         if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
962                                                         && (data[++index] == 'c') && (data[++index] == 't')) {
963                                                 found = true;
964                                                 tempToken = TokenNameobjectCAST;
965                                         } else {
966                                                 index = 0;
967                                                 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
968                                                                 && (data[++index] == 'n') && (data[++index] == 'g')) {
969                                                         found = true;
970                                                         tempToken = TokenNamestringCAST;
971                                                 } else {
972                                                         index = 0;
973                                                         if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
974                                                                         && (data[++index] == 'l') && (data[++index] == 'e')) {
975                                                                 found = true;
976                                                                 tempToken = TokenNamedoubleCAST;
977                                                         }
978                                                 }
979                                         }
980                                         break;
981                                 case 7:
982                                         // boolean integer
983                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
984                                                         && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
985                                                 found = true;
986                                                 tempToken = TokenNameboolCAST;
987                                         } else {
988                                                 index = 0;
989                                                 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
990                                                                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
991                                                         found = true;
992                                                         tempToken = TokenNameintCAST;
993                                                 }
994                                         }
995                                         break;
996                                 }
997                                 if (found) {
998                                         while (currentCharacter == ' ' || currentCharacter == '\t') {
999                                                 currentCharacter = source[currentPosition++];
1000                                         }
1001                                         if (currentCharacter == ')') {
1002                                                 return tempToken;
1003                                         }
1004                                 }
1005                         }
1006                 } catch (IndexOutOfBoundsException e) {
1007                 }
1008                 currentCharacter = tempCharacter;
1009                 currentPosition = tempPosition;
1010                 return TokenNameLPAREN;
1011         }
1012
1013         public void consumeStringInterpolated() throws InvalidInputException {
1014                 try {
1015                         // consume next character
1016                         unicodeAsBackSlash = false;
1017                         currentCharacter = source[currentPosition++];
1018                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1019                         // && (source[currentPosition] == 'u')) {
1020                         // getNextUnicodeChar();
1021                         // } else {
1022                         // if (withoutUnicodePtr != 0) {
1023                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1024                         // currentCharacter;
1025                         // }
1026                         // }
1027                         while (currentCharacter != '`') {
1028                                 /** ** in PHP \r and \n are valid in string literals *** */
1029                                 // if ((currentCharacter == '\n')
1030                                 // || (currentCharacter == '\r')) {
1031                                 // // relocate if finding another quote fairly close: thus unicode
1032                                 // '/u000D' will be fully consumed
1033                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1034                                 // if (currentPosition + lookAhead == source.length)
1035                                 // break;
1036                                 // if (source[currentPosition + lookAhead] == '\n')
1037                                 // break;
1038                                 // if (source[currentPosition + lookAhead] == '\"') {
1039                                 // currentPosition += lookAhead + 1;
1040                                 // break;
1041                                 // }
1042                                 // }
1043                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1044                                 // }
1045                                 if (currentCharacter == '\\') {
1046                                         int escapeSize = currentPosition;
1047                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1048                                         // scanEscapeCharacter make a side effect on this value and we need
1049                                         // the previous value few lines down this one
1050                                         scanDoubleQuotedEscapeCharacter();
1051                                         escapeSize = currentPosition - escapeSize;
1052                                         if (withoutUnicodePtr == 0) {
1053                                                 // buffer all the entries that have been left aside....
1054                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1055                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1056                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1057                                         } else { // overwrite the / in the buffer
1058                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1059                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1060                                                         // where only one is correct
1061                                                         withoutUnicodePtr--;
1062                                                 }
1063                                         }
1064                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1065                                         if (recordLineSeparator) {
1066                                                 pushLineSeparator();
1067                                         }
1068                                 }
1069                                 // consume next character
1070                                 unicodeAsBackSlash = false;
1071                                 currentCharacter = source[currentPosition++];
1072                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1073                                 // && (source[currentPosition] == 'u')) {
1074                                 // getNextUnicodeChar();
1075                                 // } else {
1076                                 if (withoutUnicodePtr != 0) {
1077                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1078                                 }
1079                                 // }
1080                         }
1081                 } catch (IndexOutOfBoundsException e) {
1082                         // reset end position for error reporting
1083                         currentPosition -= 2;
1084                         throw new InvalidInputException(UNTERMINATED_STRING);
1085                 } catch (InvalidInputException e) {
1086                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1087                                 // relocate if finding another quote fairly close: thus unicode
1088                                 // '/u000D' will be fully consumed
1089                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1090                                         if (currentPosition + lookAhead == source.length)
1091                                                 break;
1092                                         if (source[currentPosition + lookAhead] == '\n')
1093                                                 break;
1094                                         if (source[currentPosition + lookAhead] == '`') {
1095                                                 currentPosition += lookAhead + 1;
1096                                                 break;
1097                                         }
1098                                 }
1099                         }
1100                         throw e; // rethrow
1101                 }
1102                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1103                         // //$NON-NLS-?$ where ? is an
1104                         // int.
1105                         if (currentLine == null) {
1106                                 currentLine = new NLSLine();
1107                                 lines.add(currentLine);
1108                         }
1109                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1110                 }
1111         }
1112
1113         public void consumeStringConstant() throws InvalidInputException {
1114                 try {
1115                         // consume next character
1116                         unicodeAsBackSlash = false;
1117                         currentCharacter = source[currentPosition++];
1118                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1119                         // && (source[currentPosition] == 'u')) {
1120                         // getNextUnicodeChar();
1121                         // } else {
1122                         // if (withoutUnicodePtr != 0) {
1123                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1124                         // currentCharacter;
1125                         // }
1126                         // }
1127                         while (currentCharacter != '\'') {
1128                                 /** ** in PHP \r and \n are valid in string literals *** */
1129                                 // if ((currentCharacter == '\n')
1130                                 // || (currentCharacter == '\r')) {
1131                                 // // relocate if finding another quote fairly close: thus unicode
1132                                 // '/u000D' will be fully consumed
1133                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1134                                 // if (currentPosition + lookAhead == source.length)
1135                                 // break;
1136                                 // if (source[currentPosition + lookAhead] == '\n')
1137                                 // break;
1138                                 // if (source[currentPosition + lookAhead] == '\"') {
1139                                 // currentPosition += lookAhead + 1;
1140                                 // break;
1141                                 // }
1142                                 // }
1143                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1144                                 // }
1145                                 if (currentCharacter == '\\') {
1146                                         int escapeSize = currentPosition;
1147                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1148                                         // scanEscapeCharacter make a side effect on this value and we need
1149                                         // the previous value few lines down this one
1150                                         scanSingleQuotedEscapeCharacter();
1151                                         escapeSize = currentPosition - escapeSize;
1152                                         if (withoutUnicodePtr == 0) {
1153                                                 // buffer all the entries that have been left aside....
1154                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1155                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1156                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1157                                         } else { // overwrite the / in the buffer
1158                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1159                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1160                                                         // where only one is correct
1161                                                         withoutUnicodePtr--;
1162                                                 }
1163                                         }
1164                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1165                                         if (recordLineSeparator) {
1166                                                 pushLineSeparator();
1167                                         }
1168                                 }
1169                                 // consume next character
1170                                 unicodeAsBackSlash = false;
1171                                 currentCharacter = source[currentPosition++];
1172                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1173                                 // && (source[currentPosition] == 'u')) {
1174                                 // getNextUnicodeChar();
1175                                 // } else {
1176                                 if (withoutUnicodePtr != 0) {
1177                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1178                                 }
1179                                 // }
1180                         }
1181                 } catch (IndexOutOfBoundsException e) {
1182                         // reset end position for error reporting
1183                         currentPosition -= 2;
1184                         throw new InvalidInputException(UNTERMINATED_STRING);
1185                 } catch (InvalidInputException e) {
1186                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1187                                 // relocate if finding another quote fairly close: thus unicode
1188                                 // '/u000D' will be fully consumed
1189                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1190                                         if (currentPosition + lookAhead == source.length)
1191                                                 break;
1192                                         if (source[currentPosition + lookAhead] == '\n')
1193                                                 break;
1194                                         if (source[currentPosition + lookAhead] == '\'') {
1195                                                 currentPosition += lookAhead + 1;
1196                                                 break;
1197                                         }
1198                                 }
1199                         }
1200                         throw e; // rethrow
1201                 }
1202                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1203                         // //$NON-NLS-?$ where ? is an
1204                         // int.
1205                         if (currentLine == null) {
1206                                 currentLine = new NLSLine();
1207                                 lines.add(currentLine);
1208                         }
1209                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1210                 }
1211         }
1212
1213         public void consumeStringLiteral() throws InvalidInputException {
1214                 try {
1215                         boolean openDollarBrace = false;
1216                         // consume next character
1217                         unicodeAsBackSlash = false;
1218                         currentCharacter = source[currentPosition++];
1219                         while (currentCharacter != '"' || openDollarBrace) {
1220                                 /** ** in PHP \r and \n are valid in string literals *** */
1221                                 if (currentCharacter == '\\') {
1222                                         int escapeSize = currentPosition;
1223                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1224                                         // scanEscapeCharacter make a side effect on this value and we need
1225                                         // the previous value few lines down this one
1226                                         scanDoubleQuotedEscapeCharacter();
1227                                         escapeSize = currentPosition - escapeSize;
1228                                         if (withoutUnicodePtr == 0) {
1229                                                 // buffer all the entries that have been left aside....
1230                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1231                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1232                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1233                                         } else { // overwrite the / in the buffer
1234                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1235                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1236                                                         // where only one is correct
1237                                                         withoutUnicodePtr--;
1238                                                 }
1239                                         }
1240                                 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1241                                         openDollarBrace = true;
1242                                 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1243                                         openDollarBrace = true;
1244                                 } else if (currentCharacter == '}') {
1245                                         openDollarBrace = false;
1246                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1247                                         if (recordLineSeparator) {
1248                                                 pushLineSeparator();
1249                                         }
1250                                 }
1251                                 // consume next character
1252                                 unicodeAsBackSlash = false;
1253                                 currentCharacter = source[currentPosition++];
1254                                 if (withoutUnicodePtr != 0) {
1255                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1256                                 }
1257                         }
1258                 } catch (IndexOutOfBoundsException e) {
1259                         // reset end position for error reporting
1260                         currentPosition -= 2;
1261                         throw new InvalidInputException(UNTERMINATED_STRING);
1262                 } catch (InvalidInputException e) {
1263                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1264                                 // relocate if finding another quote fairly close: thus unicode
1265                                 // '/u000D' will be fully consumed
1266                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1267                                         if (currentPosition + lookAhead == source.length)
1268                                                 break;
1269                                         if (source[currentPosition + lookAhead] == '\n')
1270                                                 break;
1271                                         if (source[currentPosition + lookAhead] == '\"') {
1272                                                 currentPosition += lookAhead + 1;
1273                                                 break;
1274                                         }
1275                                 }
1276                         }
1277                         throw e; // rethrow
1278                 }
1279                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1280                         // //$NON-NLS-?$ where ? is an
1281                         // int.
1282                         if (currentLine == null) {
1283                                 currentLine = new NLSLine();
1284                                 lines.add(currentLine);
1285                         }
1286                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1287                 }
1288         }
1289
1290         public int getNextToken() throws InvalidInputException {
1291                 phpExpressionTag = false;
1292                 if (!phpMode) {
1293                         return getInlinedHTMLToken(currentPosition);
1294                 }
1295                 if (phpMode) {
1296                         this.wasAcr = false;
1297                         if (diet) {
1298                                 jumpOverMethodBody();
1299                                 diet = false;
1300                                 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1301                         }
1302                         try {
1303                                 while (true) {
1304                                         withoutUnicodePtr = 0;
1305                                         // start with a new token
1306                                         char encapsedChar = ' ';
1307                                         // if (!encapsedStringStack.isEmpty()) {
1308                                         // encapsedChar = ((Character)
1309                                         // encapsedStringStack.peek()).charValue();
1310                                         // }
1311                                         // if (encapsedChar != '$' && encapsedChar != ' ') {
1312                                         // currentCharacter = source[currentPosition++];
1313                                         // if (currentCharacter == encapsedChar) {
1314                                         // switch (currentCharacter) {
1315                                         // case '`':
1316                                         // return TokenNameEncapsedString0;
1317                                         // case '\'':
1318                                         // return TokenNameEncapsedString1;
1319                                         // case '"':
1320                                         // return TokenNameEncapsedString2;
1321                                         // }
1322                                         // }
1323                                         // while (currentCharacter != encapsedChar) {
1324                                         // /** ** in PHP \r and \n are valid in string literals *** */
1325                                         // switch (currentCharacter) {
1326                                         // case '\\':
1327                                         // int escapeSize = currentPosition;
1328                                         // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1329                                         // //scanEscapeCharacter make a side effect on this value and
1330                                         // // we need the previous value few lines down this one
1331                                         // scanDoubleQuotedEscapeCharacter();
1332                                         // escapeSize = currentPosition - escapeSize;
1333                                         // if (withoutUnicodePtr == 0) {
1334                                         // //buffer all the entries that have been left aside....
1335                                         // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1336                                         // startPosition;
1337                                         // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1338                                         // withoutUnicodePtr);
1339                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1340                                         // } else { //overwrite the / in the buffer
1341                                         // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1342                                         // if (backSlashAsUnicodeInString) { //there are TWO \ in
1343                                         // withoutUnicodePtr--;
1344                                         // }
1345                                         // }
1346                                         // break;
1347                                         // case '\r':
1348                                         // case '\n':
1349                                         // if (recordLineSeparator) {
1350                                         // pushLineSeparator();
1351                                         // }
1352                                         // break;
1353                                         // case '$':
1354                                         // if (isPHPIdentifierStart(source[currentPosition]) ||
1355                                         // source[currentPosition] == '{') {
1356                                         // currentPosition--;
1357                                         // encapsedStringStack.push(new Character('$'));
1358                                         // return TokenNameSTRING;
1359                                         // }
1360                                         // break;
1361                                         // case '{':
1362                                         // if (source[currentPosition] == '$') { // CURLY_OPEN
1363                                         // currentPosition--;
1364                                         // encapsedStringStack.push(new Character('$'));
1365                                         // return TokenNameSTRING;
1366                                         // }
1367                                         // }
1368                                         // // consume next character
1369                                         // unicodeAsBackSlash = false;
1370                                         // currentCharacter = source[currentPosition++];
1371                                         // if (withoutUnicodePtr != 0) {
1372                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1373                                         // }
1374                                         // // }
1375                                         // } // end while
1376                                         // currentPosition--;
1377                                         // return TokenNameSTRING;
1378                                         // }
1379                                         // ---------Consume white space and handles startPosition---------
1380                                         int whiteStart = currentPosition;
1381                                         startPosition = currentPosition;
1382                                         currentCharacter = source[currentPosition++];
1383                                         // if (encapsedChar == '$') {
1384                                         // switch (currentCharacter) {
1385                                         // case '\\':
1386                                         // currentCharacter = source[currentPosition++];
1387                                         // return TokenNameSTRING;
1388                                         // case '{':
1389                                         // if (encapsedChar == '$') {
1390                                         // if (getNextChar('$'))
1391                                         // return TokenNameLBRACE_DOLLAR;
1392                                         // }
1393                                         // return TokenNameLBRACE;
1394                                         // case '}':
1395                                         // return TokenNameRBRACE;
1396                                         // case '[':
1397                                         // return TokenNameLBRACKET;
1398                                         // case ']':
1399                                         // return TokenNameRBRACKET;
1400                                         // case '\'':
1401                                         // if (tokenizeStrings) {
1402                                         // consumeStringConstant();
1403                                         // return TokenNameStringSingleQuote;
1404                                         // }
1405                                         // return TokenNameEncapsedString1;
1406                                         // case '"':
1407                                         // return TokenNameEncapsedString2;
1408                                         // case '`':
1409                                         // if (tokenizeStrings) {
1410                                         // consumeStringInterpolated();
1411                                         // return TokenNameStringInterpolated;
1412                                         // }
1413                                         // return TokenNameEncapsedString0;
1414                                         // case '-':
1415                                         // if (getNextChar('>'))
1416                                         // return TokenNameMINUS_GREATER;
1417                                         // return TokenNameSTRING;
1418                                         // default:
1419                                         // if (currentCharacter == '$') {
1420                                         // int oldPosition = currentPosition;
1421                                         // try {
1422                                         // currentCharacter = source[currentPosition++];
1423                                         // if (currentCharacter == '{') {
1424                                         // return TokenNameDOLLAR_LBRACE;
1425                                         // }
1426                                         // if (isPHPIdentifierStart(currentCharacter)) {
1427                                         // return scanIdentifierOrKeyword(true);
1428                                         // } else {
1429                                         // currentPosition = oldPosition;
1430                                         // return TokenNameSTRING;
1431                                         // }
1432                                         // } catch (IndexOutOfBoundsException e) {
1433                                         // currentPosition = oldPosition;
1434                                         // return TokenNameSTRING;
1435                                         // }
1436                                         // }
1437                                         // if (isPHPIdentifierStart(currentCharacter))
1438                                         // return scanIdentifierOrKeyword(false);
1439                                         // if (Character.isDigit(currentCharacter))
1440                                         // return scanNumber(false);
1441                                         // return TokenNameERROR;
1442                                         // }
1443                                         // }
1444                                         // boolean isWhiteSpace;
1445
1446                                         while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1447                                                 startPosition = currentPosition;
1448                                                 currentCharacter = source[currentPosition++];
1449                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1450                                                 // && (source[currentPosition] == 'u')) {
1451                                                 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1452                                                 // } else {
1453                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1454                                                         checkNonExternalizeString();
1455                                                         if (recordLineSeparator) {
1456                                                                 pushLineSeparator();
1457                                                         } else {
1458                                                                 currentLine = null;
1459                                                         }
1460                                                 }
1461                                                 // isWhiteSpace = (currentCharacter == ' ')
1462                                                 // || Character.isWhitespace(currentCharacter);
1463                                                 // }
1464                                         }
1465                                         if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1466                                                 // reposition scanner in case we are interested by spaces as tokens
1467                                                 currentPosition--;
1468                                                 startPosition = whiteStart;
1469                                                 return TokenNameWHITESPACE;
1470                                         }
1471                                         // little trick to get out in the middle of a source compuation
1472                                         if (currentPosition > eofPosition)
1473                                                 return TokenNameEOF;
1474                                         // ---------Identify the next token-------------
1475                                         switch (currentCharacter) {
1476                                         case '(':
1477                                                 return getCastOrParen();
1478                                         case ')':
1479                                                 return TokenNameRPAREN;
1480                                         case '{':
1481                                                 return TokenNameLBRACE;
1482                                         case '}':
1483                                                 return TokenNameRBRACE;
1484                                         case '[':
1485                                                 return TokenNameLBRACKET;
1486                                         case ']':
1487                                                 return TokenNameRBRACKET;
1488                                         case ';':
1489                                                 return TokenNameSEMICOLON;
1490                                         case ',':
1491                                                 return TokenNameCOMMA;
1492                                         case '.':
1493                                                 if (getNextChar('='))
1494                                                         return TokenNameDOT_EQUAL;
1495                                                 if (getNextCharAsDigit())
1496                                                         return scanNumber(true);
1497                                                 return TokenNameDOT;
1498                                         case '+': {
1499                                                 int test;
1500                                                 if ((test = getNextChar('+', '=')) == 0)
1501                                                         return TokenNamePLUS_PLUS;
1502                                                 if (test > 0)
1503                                                         return TokenNamePLUS_EQUAL;
1504                                                 return TokenNamePLUS;
1505                                         }
1506                                         case '-': {
1507                                                 int test;
1508                                                 if ((test = getNextChar('-', '=')) == 0)
1509                                                         return TokenNameMINUS_MINUS;
1510                                                 if (test > 0)
1511                                                         return TokenNameMINUS_EQUAL;
1512                                                 if (getNextChar('>'))
1513                                                         return TokenNameMINUS_GREATER;
1514                                                 return TokenNameMINUS;
1515                                         }
1516                                         case '~':
1517                                                 if (getNextChar('='))
1518                                                         return TokenNameTWIDDLE_EQUAL;
1519                                                 return TokenNameTWIDDLE;
1520                                         case '!':
1521                                                 if (getNextChar('=')) {
1522                                                         if (getNextChar('=')) {
1523                                                                 return TokenNameNOT_EQUAL_EQUAL;
1524                                                         }
1525                                                         return TokenNameNOT_EQUAL;
1526                                                 }
1527                                                 return TokenNameNOT;
1528                                         case '*':
1529                                                 if (getNextChar('='))
1530                                                         return TokenNameMULTIPLY_EQUAL;
1531                                                 return TokenNameMULTIPLY;
1532                                         case '%':
1533                                                 if (getNextChar('='))
1534                                                         return TokenNameREMAINDER_EQUAL;
1535                                                 return TokenNameREMAINDER;
1536                                         case '<': {
1537                                                 int oldPosition = currentPosition;
1538                                                 try {
1539                                                         currentCharacter = source[currentPosition++];
1540                                                 } catch (IndexOutOfBoundsException e) {
1541                                                         currentPosition = oldPosition;
1542                                                         return TokenNameLESS;
1543                                                 }
1544                                                 switch (currentCharacter) {
1545                                                 case '=':
1546                                                         return TokenNameLESS_EQUAL;
1547                                                 case '>':
1548                                                         return TokenNameNOT_EQUAL;
1549                                                 case '<':
1550                                                         if (getNextChar('='))
1551                                                                 return TokenNameLEFT_SHIFT_EQUAL;
1552                                                         if (getNextChar('<')) {
1553                                                                 currentCharacter = source[currentPosition++];
1554                                                                 while (Character.isWhitespace(currentCharacter)) {
1555                                                                         currentCharacter = source[currentPosition++];
1556                                                                 }
1557                                                                 int heredocStart = currentPosition - 1;
1558                                                                 int heredocLength = 0;
1559                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1560                                                                         currentCharacter = source[currentPosition++];
1561                                                                 } else {
1562                                                                         return TokenNameERROR;
1563                                                                 }
1564                                                                 while (isPHPIdentifierPart(currentCharacter)) {
1565                                                                         currentCharacter = source[currentPosition++];
1566                                                                 }
1567                                                                 heredocLength = currentPosition - heredocStart - 1;
1568                                                                 // heredoc end-tag determination
1569                                                                 boolean endTag = true;
1570                                                                 char ch;
1571                                                                 do {
1572                                                                         ch = source[currentPosition++];
1573                                                                         if (ch == '\r' || ch == '\n') {
1574                                                                                 if (recordLineSeparator) {
1575                                                                                         pushLineSeparator();
1576                                                                                 } else {
1577                                                                                         currentLine = null;
1578                                                                                 }
1579                                                                                 for (int i = 0; i < heredocLength; i++) {
1580                                                                                         if (source[currentPosition + i] != source[heredocStart + i]) {
1581                                                                                                 endTag = false;
1582                                                                                                 break;
1583                                                                                         }
1584                                                                                 }
1585                                                                                 if (endTag) {
1586                                                                                         currentPosition += heredocLength - 1;
1587                                                                                         currentCharacter = source[currentPosition++];
1588                                                                                         break; // do...while loop
1589                                                                                 } else {
1590                                                                                         endTag = true;
1591                                                                                 }
1592                                                                         }
1593                                                                 } while (true);
1594                                                                 return TokenNameHEREDOC;
1595                                                         }
1596                                                         return TokenNameLEFT_SHIFT;
1597                                                 }
1598                                                 currentPosition = oldPosition;
1599                                                 return TokenNameLESS;
1600                                         }
1601                                         case '>': {
1602                                                 int test;
1603                                                 if ((test = getNextChar('=', '>')) == 0)
1604                                                         return TokenNameGREATER_EQUAL;
1605                                                 if (test > 0) {
1606                                                         if ((test = getNextChar('=', '>')) == 0)
1607                                                                 return TokenNameRIGHT_SHIFT_EQUAL;
1608                                                         return TokenNameRIGHT_SHIFT;
1609                                                 }
1610                                                 return TokenNameGREATER;
1611                                         }
1612                                         case '=':
1613                                                 if (getNextChar('=')) {
1614                                                         if (getNextChar('=')) {
1615                                                                 return TokenNameEQUAL_EQUAL_EQUAL;
1616                                                         }
1617                                                         return TokenNameEQUAL_EQUAL;
1618                                                 }
1619                                                 if (getNextChar('>'))
1620                                                         return TokenNameEQUAL_GREATER;
1621                                                 return TokenNameEQUAL;
1622                                         case '&': {
1623                                                 int test;
1624                                                 if ((test = getNextChar('&', '=')) == 0)
1625                                                         return TokenNameAND_AND;
1626                                                 if (test > 0)
1627                                                         return TokenNameAND_EQUAL;
1628                                                 return TokenNameAND;
1629                                         }
1630                                         case '|': {
1631                                                 int test;
1632                                                 if ((test = getNextChar('|', '=')) == 0)
1633                                                         return TokenNameOR_OR;
1634                                                 if (test > 0)
1635                                                         return TokenNameOR_EQUAL;
1636                                                 return TokenNameOR;
1637                                         }
1638                                         case '^':
1639                                                 if (getNextChar('='))
1640                                                         return TokenNameXOR_EQUAL;
1641                                                 return TokenNameXOR;
1642                                         case '?':
1643                                                 if (getNextChar('>')) {
1644                                                         phpMode = false;
1645                                                         if (currentPosition == source.length) {
1646                                                                 phpMode = true;
1647                                                                 return TokenNameINLINE_HTML;
1648                                                         }
1649                                                         return getInlinedHTMLToken(currentPosition - 2);
1650                                                 }
1651                                                 return TokenNameQUESTION;
1652                                         case ':':
1653                                                 if (getNextChar(':'))
1654                                                         return TokenNamePAAMAYIM_NEKUDOTAYIM;
1655                                                 return TokenNameCOLON;
1656                                         case '@':
1657                                                 return TokenNameAT;
1658                                         case '\'':
1659                                                 consumeStringConstant();
1660                                                 return TokenNameStringSingleQuote;
1661                                         case '"':
1662                                                 // if (tokenizeStrings) {
1663                                                 consumeStringLiteral();
1664                                                 return TokenNameStringDoubleQuote;
1665                                         // }
1666                                         // return TokenNameEncapsedString2;
1667                                         case '`':
1668                                                 // if (tokenizeStrings) {
1669                                                 consumeStringInterpolated();
1670                                                 return TokenNameStringInterpolated;
1671                                         // }
1672                                         // return TokenNameEncapsedString0;
1673                                         case '#':
1674                                         case '/': {
1675                                                 char startChar = currentCharacter;
1676                                                 if (getNextChar('=') && startChar == '/') {
1677                                                         return TokenNameDIVIDE_EQUAL;
1678                                                 }
1679                                                 int test;
1680                                                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1681                                                         // line comment
1682                                                         this.lastCommentLinePosition = this.currentPosition;
1683                                                         int endPositionForLineComment = 0;
1684                                                         try { // get the next char
1685                                                                 currentCharacter = source[currentPosition++];
1686                                                                 // if (((currentCharacter = source[currentPosition++])
1687                                                                 // == '\\')
1688                                                                 // && (source[currentPosition] == 'u')) {
1689                                                                 // //-------------unicode traitement ------------
1690                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1691                                                                 // currentPosition++;
1692                                                                 // while (source[currentPosition] == 'u') {
1693                                                                 // currentPosition++;
1694                                                                 // }
1695                                                                 // if ((c1 =
1696                                                                 // Character.getNumericValue(source[currentPosition++]))
1697                                                                 // > 15
1698                                                                 // || c1 < 0
1699                                                                 // || (c2 =
1700                                                                 // Character.getNumericValue(source[currentPosition++]))
1701                                                                 // > 15
1702                                                                 // || c2 < 0
1703                                                                 // || (c3 =
1704                                                                 // Character.getNumericValue(source[currentPosition++]))
1705                                                                 // > 15
1706                                                                 // || c3 < 0
1707                                                                 // || (c4 =
1708                                                                 // Character.getNumericValue(source[currentPosition++]))
1709                                                                 // > 15
1710                                                                 // || c4 < 0) {
1711                                                                 // throw new
1712                                                                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1713                                                                 // } else {
1714                                                                 // currentCharacter =
1715                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1716                                                                 // }
1717                                                                 // }
1718                                                                 // handle the \\u case manually into comment
1719                                                                 // if (currentCharacter == '\\') {
1720                                                                 // if (source[currentPosition] == '\\')
1721                                                                 // currentPosition++;
1722                                                                 // } //jump over the \\
1723                                                                 boolean isUnicode = false;
1724                                                                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1725                                                                         this.lastCommentLinePosition = this.currentPosition;
1726                                                                         if (currentCharacter == '?') {
1727                                                                                 if (getNextChar('>')) {
1728                                                                                         // ?> breaks line comments
1729                                                                                         startPosition = currentPosition - 2;
1730                                                                                         phpMode = false;
1731                                                                                         return TokenNameINLINE_HTML;
1732                                                                                 }
1733                                                                         }
1734                                                                         // get the next char
1735                                                                         isUnicode = false;
1736                                                                         currentCharacter = source[currentPosition++];
1737                                                                         // if (((currentCharacter = source[currentPosition++])
1738                                                                         // == '\\')
1739                                                                         // && (source[currentPosition] == 'u')) {
1740                                                                         // isUnicode = true;
1741                                                                         // //-------------unicode traitement ------------
1742                                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1743                                                                         // currentPosition++;
1744                                                                         // while (source[currentPosition] == 'u') {
1745                                                                         // currentPosition++;
1746                                                                         // }
1747                                                                         // if ((c1 =
1748                                                                         // Character.getNumericValue(source[currentPosition++]))
1749                                                                         // > 15
1750                                                                         // || c1 < 0
1751                                                                         // || (c2 =
1752                                                                         // Character.getNumericValue(
1753                                                                         // source[currentPosition++]))
1754                                                                         // > 15
1755                                                                         // || c2 < 0
1756                                                                         // || (c3 =
1757                                                                         // Character.getNumericValue(
1758                                                                         // source[currentPosition++]))
1759                                                                         // > 15
1760                                                                         // || c3 < 0
1761                                                                         // || (c4 =
1762                                                                         // Character.getNumericValue(
1763                                                                         // source[currentPosition++]))
1764                                                                         // > 15
1765                                                                         // || c4 < 0) {
1766                                                                         // throw new
1767                                                                         // InvalidInputException(INVALID_UNICODE_ESCAPE);
1768                                                                         // } else {
1769                                                                         // currentCharacter =
1770                                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1771                                                                         // }
1772                                                                         // }
1773                                                                         // handle the \\u case manually into comment
1774                                                                         // if (currentCharacter == '\\') {
1775                                                                         // if (source[currentPosition] == '\\')
1776                                                                         // currentPosition++;
1777                                                                         // } //jump over the \\
1778                                                                 }
1779                                                                 if (isUnicode) {
1780                                                                         endPositionForLineComment = currentPosition - 6;
1781                                                                 } else {
1782                                                                         endPositionForLineComment = currentPosition - 1;
1783                                                                 }
1784                                                                 // recordComment(false);
1785                                                                 recordComment(TokenNameCOMMENT_LINE);
1786                                                                 if (this.taskTags != null)
1787                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1788                                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1789                                                                         checkNonExternalizeString();
1790                                                                         if (recordLineSeparator) {
1791                                                                                 if (isUnicode) {
1792                                                                                         pushUnicodeLineSeparator();
1793                                                                                 } else {
1794                                                                                         pushLineSeparator();
1795                                                                                 }
1796                                                                         } else {
1797                                                                                 currentLine = null;
1798                                                                         }
1799                                                                 }
1800                                                                 if (tokenizeComments) {
1801                                                                         if (!isUnicode) {
1802                                                                                 currentPosition = endPositionForLineComment;
1803                                                                                 // reset one character behind
1804                                                                         }
1805                                                                         return TokenNameCOMMENT_LINE;
1806                                                                 }
1807                                                         } catch (IndexOutOfBoundsException e) { // an eof will them
1808                                                                 // be generated
1809                                                                 if (tokenizeComments) {
1810                                                                         currentPosition--;
1811                                                                         // reset one character behind
1812                                                                         return TokenNameCOMMENT_LINE;
1813                                                                 }
1814                                                         }
1815                                                         break;
1816                                                 }
1817                                                 if (test > 0) {
1818                                                         // traditional and annotation comment
1819                                                         boolean isJavadoc = false, star = false;
1820                                                         // consume next character
1821                                                         unicodeAsBackSlash = false;
1822                                                         currentCharacter = source[currentPosition++];
1823                                                         // if (((currentCharacter = source[currentPosition++]) ==
1824                                                         // '\\')
1825                                                         // && (source[currentPosition] == 'u')) {
1826                                                         // getNextUnicodeChar();
1827                                                         // } else {
1828                                                         // if (withoutUnicodePtr != 0) {
1829                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1830                                                         // currentCharacter;
1831                                                         // }
1832                                                         // }
1833                                                         if (currentCharacter == '*') {
1834                                                                 isJavadoc = true;
1835                                                                 star = true;
1836                                                         }
1837                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1838                                                                 checkNonExternalizeString();
1839                                                                 if (recordLineSeparator) {
1840                                                                         pushLineSeparator();
1841                                                                 } else {
1842                                                                         currentLine = null;
1843                                                                 }
1844                                                         }
1845                                                         try { // get the next char
1846                                                                 currentCharacter = source[currentPosition++];
1847                                                                 // if (((currentCharacter = source[currentPosition++])
1848                                                                 // == '\\')
1849                                                                 // && (source[currentPosition] == 'u')) {
1850                                                                 // //-------------unicode traitement ------------
1851                                                                 // getNextUnicodeChar();
1852                                                                 // }
1853                                                                 // handle the \\u case manually into comment
1854                                                                 // if (currentCharacter == '\\') {
1855                                                                 // if (source[currentPosition] == '\\')
1856                                                                 // currentPosition++;
1857                                                                 // //jump over the \\
1858                                                                 // }
1859                                                                 // empty comment is not a javadoc /**/
1860                                                                 if (currentCharacter == '/') {
1861                                                                         isJavadoc = false;
1862                                                                 }
1863                                                                 // loop until end of comment */
1864                                                                 while ((currentCharacter != '/') || (!star)) {
1865                                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1866                                                                                 checkNonExternalizeString();
1867                                                                                 if (recordLineSeparator) {
1868                                                                                         pushLineSeparator();
1869                                                                                 } else {
1870                                                                                         currentLine = null;
1871                                                                                 }
1872                                                                         }
1873                                                                         star = currentCharacter == '*';
1874                                                                         // get next char
1875                                                                         currentCharacter = source[currentPosition++];
1876                                                                         // if (((currentCharacter = source[currentPosition++])
1877                                                                         // == '\\')
1878                                                                         // && (source[currentPosition] == 'u')) {
1879                                                                         // //-------------unicode traitement ------------
1880                                                                         // getNextUnicodeChar();
1881                                                                         // }
1882                                                                         // handle the \\u case manually into comment
1883                                                                         // if (currentCharacter == '\\') {
1884                                                                         // if (source[currentPosition] == '\\')
1885                                                                         // currentPosition++;
1886                                                                         // } //jump over the \\
1887                                                                 }
1888                                                                 // recordComment(isJavadoc);
1889                                                                 if (isJavadoc) {
1890                                                                         recordComment(TokenNameCOMMENT_PHPDOC);
1891                                                                 } else {
1892                                                                         recordComment(TokenNameCOMMENT_BLOCK);
1893                                                                 }
1894
1895                                                                 if (tokenizeComments) {
1896                                                                         if (isJavadoc)
1897                                                                                 return TokenNameCOMMENT_PHPDOC;
1898                                                                         return TokenNameCOMMENT_BLOCK;
1899                                                                 }
1900
1901                                                                 if (this.taskTags != null) {
1902                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1903                                                                 }
1904                                                         } catch (IndexOutOfBoundsException e) {
1905                                                                 // reset end position for error reporting
1906                                                                 currentPosition -= 2;
1907                                                                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1908                                                         }
1909                                                         break;
1910                                                 }
1911                                                 return TokenNameDIVIDE;
1912                                         }
1913                                         case '\u001a':
1914                                                 if (atEnd())
1915                                                         return TokenNameEOF;
1916                                                 // the atEnd may not be <currentPosition == source.length> if
1917                                                 // source is only some part of a real (external) stream
1918                                                 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1919                                         default:
1920                                                 if (currentCharacter == '$') {
1921                                                         int oldPosition = currentPosition;
1922                                                         try {
1923                                                                 currentCharacter = source[currentPosition++];
1924                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1925                                                                         return scanIdentifierOrKeyword(true);
1926                                                                 } else {
1927                                                                         currentPosition = oldPosition;
1928                                                                         return TokenNameDOLLAR;
1929                                                                 }
1930                                                         } catch (IndexOutOfBoundsException e) {
1931                                                                 currentPosition = oldPosition;
1932                                                                 return TokenNameDOLLAR;
1933                                                         }
1934                                                 }
1935                                                 if (isPHPIdentifierStart(currentCharacter))
1936                                                         return scanIdentifierOrKeyword(false);
1937                                                 if (Character.isDigit(currentCharacter))
1938                                                         return scanNumber(false);
1939                                                 return TokenNameERROR;
1940                                         }
1941                                 }
1942                         } // -----------------end switch while try--------------------
1943                         catch (IndexOutOfBoundsException e) {
1944                         }
1945                 }
1946                 return TokenNameEOF;
1947         }
1948
1949         /**
1950          * @return
1951          * @throws InvalidInputException
1952          */
1953         private int getInlinedHTMLToken(int start) throws InvalidInputException {
1954                 if (currentPosition > source.length) {
1955                         currentPosition = source.length;
1956                         return TokenNameEOF;
1957                 }
1958                 startPosition = start;
1959                 try {
1960                         while (!phpMode) {
1961                                 currentCharacter = source[currentPosition++];
1962                                 if (currentCharacter == '<') {
1963                                         if (getNextChar('?')) {
1964                                                 currentCharacter = source[currentPosition++];
1965                                                 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1966                                                         if (currentCharacter != '=') { // <?=
1967                                                                 currentPosition--;
1968                                                         } else {
1969                                                                 phpExpressionTag = true;
1970                                                         }
1971                                                         // <?
1972                                                         if (ignorePHPOneLiner) { // for CodeFormatter
1973                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1974                                                                         phpMode = true;
1975                                                                         return TokenNameINLINE_HTML;
1976                                                                 }
1977                                                         } else {
1978                                                                 phpMode = true;
1979                                                                 return TokenNameINLINE_HTML;
1980                                                         }
1981                                                 } else {
1982                                                         // boolean phpStart = (currentCharacter == 'P') ||
1983                                                         // (currentCharacter == 'p');
1984                                                         // if (phpStart) {
1985                                                         int test = getNextChar('H', 'h');
1986                                                         if (test >= 0) {
1987                                                                 test = getNextChar('P', 'p');
1988                                                                 if (test >= 0) {
1989                                                                         // <?PHP <?php
1990                                                                         if (ignorePHPOneLiner) {
1991                                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1992                                                                                         phpMode = true;
1993                                                                                         return TokenNameINLINE_HTML;
1994                                                                                 }
1995                                                                         } else {
1996                                                                                 phpMode = true;
1997                                                                                 return TokenNameINLINE_HTML;
1998                                                                         }
1999                                                                 }
2000                                                         }
2001                                                         // }
2002                                                 }
2003                                         }
2004                                 }
2005                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2006                                         if (recordLineSeparator) {
2007                                                 pushLineSeparator();
2008                                         } else {
2009                                                 currentLine = null;
2010                                         }
2011                                 }
2012                         } // -----------------while--------------------
2013                         phpMode = true;
2014                         return TokenNameINLINE_HTML;
2015                 } // -----------------try--------------------
2016                 catch (IndexOutOfBoundsException e) {
2017                         startPosition = start;
2018                         currentPosition--;
2019                 }
2020                 phpMode = true;
2021                 return TokenNameINLINE_HTML;
2022         }
2023
2024         /**
2025          * @return
2026          */
2027         private int lookAheadLinePHPTag() {
2028                 // check if the PHP is only in this line (for CodeFormatter)
2029                 int currentPositionInLine = currentPosition;
2030                 char previousCharInLine = ' ';
2031                 char currentCharInLine = ' ';
2032                 boolean singleQuotedStringActive = false;
2033                 boolean doubleQuotedStringActive = false;
2034
2035                 try {
2036                         // look ahead in this line
2037                         while (true) {
2038                                 previousCharInLine = currentCharInLine;
2039                                 currentCharInLine = source[currentPositionInLine++];
2040                                 switch (currentCharInLine) {
2041                                 case '>':
2042                                         if (previousCharInLine == '?') {
2043                                                 // update the scanner's current Position in the source
2044                                                 currentPosition = currentPositionInLine;
2045                                                 // use as "dummy" token
2046                                                 return TokenNameEOF;
2047                                         }
2048                                         break;
2049                                 case '\\':
2050                                         if (doubleQuotedStringActive) {
2051                                                 // ignore escaped characters in double quoted strings
2052                                                 previousCharInLine = currentCharInLine;
2053                                                 currentCharInLine = source[currentPositionInLine++];
2054                                         }
2055                                 case '\"':
2056                                         if (doubleQuotedStringActive) {
2057                                                 doubleQuotedStringActive = false;
2058                                         } else {
2059                                                 if (!singleQuotedStringActive) {
2060                                                         doubleQuotedStringActive = true;
2061                                                 }
2062                                         }
2063                                         break;
2064                                 case '\'':
2065                                         if (singleQuotedStringActive) {
2066                                                 if (previousCharInLine != '\\') {
2067                                                         singleQuotedStringActive = false;
2068                                                 }
2069                                         } else {
2070                                                 if (!doubleQuotedStringActive) {
2071                                                         singleQuotedStringActive = true;
2072                                                 }
2073                                         }
2074                                         break;
2075                                 case '\n':
2076                                         phpMode = true;
2077                                         return TokenNameINLINE_HTML;
2078                                 case '#':
2079                                         if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2080                                                 phpMode = true;
2081                                                 return TokenNameINLINE_HTML;
2082                                         }
2083                                         break;
2084                                 case '/':
2085                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2086                                                 phpMode = true;
2087                                                 return TokenNameINLINE_HTML;
2088                                         }
2089                                         break;
2090                                 case '*':
2091                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2092                                                 phpMode = true;
2093                                                 return TokenNameINLINE_HTML;
2094                                         }
2095                                         break;
2096                                 }
2097                         }
2098                 } catch (IndexOutOfBoundsException e) {
2099                         phpMode = true;
2100                         currentPosition = currentPositionInLine;
2101                         return TokenNameINLINE_HTML;
2102                 }
2103         }
2104
2105         // public final void getNextUnicodeChar()
2106         // throws IndexOutOfBoundsException, InvalidInputException {
2107         // //VOID
2108         // //handle the case of unicode.
2109         // //when a unicode appears then we must use a buffer that holds char
2110         // internal values
2111         // //At the end of this method currentCharacter holds the new visited char
2112         // //and currentPosition points right next after it
2113         //
2114         // //ALL getNextChar.... ARE OPTIMIZED COPIES
2115         //
2116         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2117         // currentPosition++;
2118         // while (source[currentPosition] == 'u') {
2119         // currentPosition++;
2120         // unicodeSize++;
2121         // }
2122         //
2123         // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2124         // || c1 < 0
2125         // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2126         // || c2 < 0
2127         // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2128         // || c3 < 0
2129         // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2130         // || c4 < 0) {
2131         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2132         // } else {
2133         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2134         // //need the unicode buffer
2135         // if (withoutUnicodePtr == 0) {
2136         // //buffer all the entries that have been left aside....
2137         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2138         // System.arraycopy(
2139         // source,
2140         // startPosition,
2141         // withoutUnicodeBuffer,
2142         // 1,
2143         // withoutUnicodePtr);
2144         // }
2145         // //fill the buffer with the char
2146         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2147         // }
2148         // unicodeAsBackSlash = currentCharacter == '\\';
2149         // }
2150         /*
2151          * Tokenize a method body, assuming that curly brackets are properly balanced.
2152          */
2153         public final void jumpOverMethodBody() {
2154                 this.wasAcr = false;
2155                 int found = 1;
2156                 try {
2157                         while (true) { // loop for jumping over comments
2158                                 // ---------Consume white space and handles startPosition---------
2159                                 boolean isWhiteSpace;
2160                                 do {
2161                                         startPosition = currentPosition;
2162                                         currentCharacter = source[currentPosition++];
2163                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2164                                         // && (source[currentPosition] == 'u')) {
2165                                         // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2166                                         // } else {
2167                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2168                                                 pushLineSeparator();
2169                                         isWhiteSpace = Character.isWhitespace(currentCharacter);
2170                                         // }
2171                                 } while (isWhiteSpace);
2172                                 // -------consume token until } is found---------
2173                                 switch (currentCharacter) {
2174                                 case '{':
2175                                         found++;
2176                                         break;
2177                                 case '}':
2178                                         found--;
2179                                         if (found == 0)
2180                                                 return;
2181                                         break;
2182                                 case '\'': {
2183                                         boolean test;
2184                                         test = getNextChar('\\');
2185                                         if (test) {
2186                                                 try {
2187                                                         scanDoubleQuotedEscapeCharacter();
2188                                                 } catch (InvalidInputException ex) {
2189                                                 }
2190                                                 ;
2191                                         } else {
2192                                                 // try { // consume next character
2193                                                 unicodeAsBackSlash = false;
2194                                                 currentCharacter = source[currentPosition++];
2195                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2196                                                 // && (source[currentPosition] == 'u')) {
2197                                                 // getNextUnicodeChar();
2198                                                 // } else {
2199                                                 if (withoutUnicodePtr != 0) {
2200                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2201                                                 }
2202                                                 // }
2203                                                 // } catch (InvalidInputException ex) {
2204                                                 // };
2205                                         }
2206                                         getNextChar('\'');
2207                                         break;
2208                                 }
2209                                 case '"':
2210                                         try {
2211                                                 // try { // consume next character
2212                                                 unicodeAsBackSlash = false;
2213                                                 currentCharacter = source[currentPosition++];
2214                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2215                                                 // && (source[currentPosition] == 'u')) {
2216                                                 // getNextUnicodeChar();
2217                                                 // } else {
2218                                                 if (withoutUnicodePtr != 0) {
2219                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2220                                                 }
2221                                                 // }
2222                                                 // } catch (InvalidInputException ex) {
2223                                                 // };
2224                                                 while (currentCharacter != '"') {
2225                                                         if (currentCharacter == '\r') {
2226                                                                 if (source[currentPosition] == '\n')
2227                                                                         currentPosition++;
2228                                                                 break;
2229                                                                 // the string cannot go further that the line
2230                                                         }
2231                                                         if (currentCharacter == '\n') {
2232                                                                 break;
2233                                                                 // the string cannot go further that the line
2234                                                         }
2235                                                         if (currentCharacter == '\\') {
2236                                                                 try {
2237                                                                         scanDoubleQuotedEscapeCharacter();
2238                                                                 } catch (InvalidInputException ex) {
2239                                                                 }
2240                                                                 ;
2241                                                         }
2242                                                         // try { // consume next character
2243                                                         unicodeAsBackSlash = false;
2244                                                         currentCharacter = source[currentPosition++];
2245                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2246                                                         // && (source[currentPosition] == 'u')) {
2247                                                         // getNextUnicodeChar();
2248                                                         // } else {
2249                                                         if (withoutUnicodePtr != 0) {
2250                                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2251                                                         }
2252                                                         // }
2253                                                         // } catch (InvalidInputException ex) {
2254                                                         // };
2255                                                 }
2256                                         } catch (IndexOutOfBoundsException e) {
2257                                                 return;
2258                                         }
2259                                         break;
2260                                 case '/': {
2261                                         int test;
2262                                         if ((test = getNextChar('/', '*')) == 0) {
2263                                                 // line comment
2264                                                 try {
2265                                                         // get the next char
2266                                                         currentCharacter = source[currentPosition++];
2267                                                         // if (((currentCharacter = source[currentPosition++]) ==
2268                                                         // '\\')
2269                                                         // && (source[currentPosition] == 'u')) {
2270                                                         // //-------------unicode traitement ------------
2271                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2272                                                         // currentPosition++;
2273                                                         // while (source[currentPosition] == 'u') {
2274                                                         // currentPosition++;
2275                                                         // }
2276                                                         // if ((c1 =
2277                                                         // Character.getNumericValue(source[currentPosition++]))
2278                                                         // > 15
2279                                                         // || c1 < 0
2280                                                         // || (c2 =
2281                                                         // Character.getNumericValue(source[currentPosition++]))
2282                                                         // > 15
2283                                                         // || c2 < 0
2284                                                         // || (c3 =
2285                                                         // Character.getNumericValue(source[currentPosition++]))
2286                                                         // > 15
2287                                                         // || c3 < 0
2288                                                         // || (c4 =
2289                                                         // Character.getNumericValue(source[currentPosition++]))
2290                                                         // > 15
2291                                                         // || c4 < 0) {
2292                                                         // //error don't care of the value
2293                                                         // currentCharacter = 'A';
2294                                                         // } //something different from \n and \r
2295                                                         // else {
2296                                                         // currentCharacter =
2297                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2298                                                         // }
2299                                                         // }
2300                                                         while (currentCharacter != '\r' && currentCharacter != '\n') {
2301                                                                 // get the next char
2302                                                                 currentCharacter = source[currentPosition++];
2303                                                                 // if (((currentCharacter = source[currentPosition++])
2304                                                                 // == '\\')
2305                                                                 // && (source[currentPosition] == 'u')) {
2306                                                                 // //-------------unicode traitement ------------
2307                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2308                                                                 // currentPosition++;
2309                                                                 // while (source[currentPosition] == 'u') {
2310                                                                 // currentPosition++;
2311                                                                 // }
2312                                                                 // if ((c1 =
2313                                                                 // Character.getNumericValue(source[currentPosition++]))
2314                                                                 // > 15
2315                                                                 // || c1 < 0
2316                                                                 // || (c2 =
2317                                                                 // Character.getNumericValue(source[currentPosition++]))
2318                                                                 // > 15
2319                                                                 // || c2 < 0
2320                                                                 // || (c3 =
2321                                                                 // Character.getNumericValue(source[currentPosition++]))
2322                                                                 // > 15
2323                                                                 // || c3 < 0
2324                                                                 // || (c4 =
2325                                                                 // Character.getNumericValue(source[currentPosition++]))
2326                                                                 // > 15
2327                                                                 // || c4 < 0) {
2328                                                                 // //error don't care of the value
2329                                                                 // currentCharacter = 'A';
2330                                                                 // } //something different from \n and \r
2331                                                                 // else {
2332                                                                 // currentCharacter =
2333                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2334                                                                 // }
2335                                                                 // }
2336                                                         }
2337                                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2338                                                                 pushLineSeparator();
2339                                                 } catch (IndexOutOfBoundsException e) {
2340                                                 } // an eof will them be generated
2341                                                 break;
2342                                         }
2343                                         if (test > 0) {
2344                                                 // traditional and annotation comment
2345                                                 boolean star = false;
2346                                                 // try { // consume next character
2347                                                 unicodeAsBackSlash = false;
2348                                                 currentCharacter = source[currentPosition++];
2349                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2350                                                 // && (source[currentPosition] == 'u')) {
2351                                                 // getNextUnicodeChar();
2352                                                 // } else {
2353                                                 if (withoutUnicodePtr != 0) {
2354                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2355                                                 }
2356                                                 // };
2357                                                 // } catch (InvalidInputException ex) {
2358                                                 // };
2359                                                 if (currentCharacter == '*') {
2360                                                         star = true;
2361                                                 }
2362                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2363                                                         pushLineSeparator();
2364                                                 try { // get the next char
2365                                                         currentCharacter = source[currentPosition++];
2366                                                         // if (((currentCharacter = source[currentPosition++]) ==
2367                                                         // '\\')
2368                                                         // && (source[currentPosition] == 'u')) {
2369                                                         // //-------------unicode traitement ------------
2370                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2371                                                         // currentPosition++;
2372                                                         // while (source[currentPosition] == 'u') {
2373                                                         // currentPosition++;
2374                                                         // }
2375                                                         // if ((c1 =
2376                                                         // Character.getNumericValue(source[currentPosition++]))
2377                                                         // > 15
2378                                                         // || c1 < 0
2379                                                         // || (c2 =
2380                                                         // Character.getNumericValue(source[currentPosition++]))
2381                                                         // > 15
2382                                                         // || c2 < 0
2383                                                         // || (c3 =
2384                                                         // Character.getNumericValue(source[currentPosition++]))
2385                                                         // > 15
2386                                                         // || c3 < 0
2387                                                         // || (c4 =
2388                                                         // Character.getNumericValue(source[currentPosition++]))
2389                                                         // > 15
2390                                                         // || c4 < 0) {
2391                                                         // //error don't care of the value
2392                                                         // currentCharacter = 'A';
2393                                                         // } //something different from * and /
2394                                                         // else {
2395                                                         // currentCharacter =
2396                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2397                                                         // }
2398                                                         // }
2399                                                         // loop until end of comment */
2400                                                         while ((currentCharacter != '/') || (!star)) {
2401                                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2402                                                                         pushLineSeparator();
2403                                                                 star = currentCharacter == '*';
2404                                                                 // get next char
2405                                                                 currentCharacter = source[currentPosition++];
2406                                                                 // if (((currentCharacter = source[currentPosition++])
2407                                                                 // == '\\')
2408                                                                 // && (source[currentPosition] == 'u')) {
2409                                                                 // //-------------unicode traitement ------------
2410                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2411                                                                 // currentPosition++;
2412                                                                 // while (source[currentPosition] == 'u') {
2413                                                                 // currentPosition++;
2414                                                                 // }
2415                                                                 // if ((c1 =
2416                                                                 // Character.getNumericValue(source[currentPosition++]))
2417                                                                 // > 15
2418                                                                 // || c1 < 0
2419                                                                 // || (c2 =
2420                                                                 // Character.getNumericValue(source[currentPosition++]))
2421                                                                 // > 15
2422                                                                 // || c2 < 0
2423                                                                 // || (c3 =
2424                                                                 // Character.getNumericValue(source[currentPosition++]))
2425                                                                 // > 15
2426                                                                 // || c3 < 0
2427                                                                 // || (c4 =
2428                                                                 // Character.getNumericValue(source[currentPosition++]))
2429                                                                 // > 15
2430                                                                 // || c4 < 0) {
2431                                                                 // //error don't care of the value
2432                                                                 // currentCharacter = 'A';
2433                                                                 // } //something different from * and /
2434                                                                 // else {
2435                                                                 // currentCharacter =
2436                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2437                                                                 // }
2438                                                                 // }
2439                                                         }
2440                                                 } catch (IndexOutOfBoundsException e) {
2441                                                         return;
2442                                                 }
2443                                                 break;
2444                                         }
2445                                         break;
2446                                 }
2447                                 default:
2448                                         if (isPHPIdentOrVarStart(currentCharacter)) {
2449                                                 try {
2450                                                         scanIdentifierOrKeyword((currentCharacter == '$'));
2451                                                 } catch (InvalidInputException ex) {
2452                                                 }
2453                                                 ;
2454                                                 break;
2455                                         }
2456                                         if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2457                                                 // if (Character.isDigit(currentCharacter)) {
2458                                                 try {
2459                                                         scanNumber(false);
2460                                                 } catch (InvalidInputException ex) {
2461                                                 }
2462                                                 ;
2463                                                 break;
2464                                         }
2465                                 }
2466                         }
2467                         // -----------------end switch while try--------------------
2468                 } catch (IndexOutOfBoundsException e) {
2469                 } catch (InvalidInputException e) {
2470                 }
2471                 return;
2472         }
2473
2474         // public final boolean jumpOverUnicodeWhiteSpace()
2475         // throws InvalidInputException {
2476         // //BOOLEAN
2477         // //handle the case of unicode. Jump over the next whiteSpace
2478         // //making startPosition pointing on the next available char
2479         // //On false, the currentCharacter is filled up with a potential
2480         // //correct char
2481         //
2482         // try {
2483         // this.wasAcr = false;
2484         // int c1, c2, c3, c4;
2485         // int unicodeSize = 6;
2486         // currentPosition++;
2487         // while (source[currentPosition] == 'u') {
2488         // currentPosition++;
2489         // unicodeSize++;
2490         // }
2491         //
2492         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2493         // || c1 < 0)
2494         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2495         // || c2 < 0)
2496         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2497         // || c3 < 0)
2498         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2499         // || c4 < 0)) {
2500         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2501         // }
2502         //
2503         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2504         // if (recordLineSeparator
2505         // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2506         // pushLineSeparator();
2507         // if (Character.isWhitespace(currentCharacter))
2508         // return true;
2509         //
2510         // //buffer the new char which is not a white space
2511         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2512         // //withoutUnicodePtr == 1 is true here
2513         // return false;
2514         // } catch (IndexOutOfBoundsException e) {
2515         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2516         // }
2517         // }
2518         public final int[] getLineEnds() {
2519                 // return a bounded copy of this.lineEnds
2520                 int[] copy;
2521                 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2522                 return copy;
2523         }
2524
2525         public char[] getSource() {
2526                 return this.source;
2527         }
2528
2529         public static boolean isIdentifierOrKeyword(int token) {
2530                 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2531         }
2532
2533         final char[] optimizedCurrentTokenSource1() {
2534                 // return always the same char[] build only once
2535                 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2536                 char charOne = source[startPosition];
2537                 switch (charOne) {
2538                 case 'a':
2539                         return charArray_a;
2540                 case 'b':
2541                         return charArray_b;
2542                 case 'c':
2543                         return charArray_c;
2544                 case 'd':
2545                         return charArray_d;
2546                 case 'e':
2547                         return charArray_e;
2548                 case 'f':
2549                         return charArray_f;
2550                 case 'g':
2551                         return charArray_g;
2552                 case 'h':
2553                         return charArray_h;
2554                 case 'i':
2555                         return charArray_i;
2556                 case 'j':
2557                         return charArray_j;
2558                 case 'k':
2559                         return charArray_k;
2560                 case 'l':
2561                         return charArray_l;
2562                 case 'm':
2563                         return charArray_m;
2564                 case 'n':
2565                         return charArray_n;
2566                 case 'o':
2567                         return charArray_o;
2568                 case 'p':
2569                         return charArray_p;
2570                 case 'q':
2571                         return charArray_q;
2572                 case 'r':
2573                         return charArray_r;
2574                 case 's':
2575                         return charArray_s;
2576                 case 't':
2577                         return charArray_t;
2578                 case 'u':
2579                         return charArray_u;
2580                 case 'v':
2581                         return charArray_v;
2582                 case 'w':
2583                         return charArray_w;
2584                 case 'x':
2585                         return charArray_x;
2586                 case 'y':
2587                         return charArray_y;
2588                 case 'z':
2589                         return charArray_z;
2590                 default:
2591                         return new char[] { charOne };
2592                 }
2593         }
2594
2595         final char[] optimizedCurrentTokenSource2() {
2596                 char c0, c1;
2597                 c0 = source[startPosition];
2598                 c1 = source[startPosition + 1];
2599                 if (c0 == '$') {
2600                         // return always the same char[] build only once
2601                         // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2602                         switch (c1) {
2603                         case 'a':
2604                                 return charArray_va;
2605                         case 'b':
2606                                 return charArray_vb;
2607                         case 'c':
2608                                 return charArray_vc;
2609                         case 'd':
2610                                 return charArray_vd;
2611                         case 'e':
2612                                 return charArray_ve;
2613                         case 'f':
2614                                 return charArray_vf;
2615                         case 'g':
2616                                 return charArray_vg;
2617                         case 'h':
2618                                 return charArray_vh;
2619                         case 'i':
2620                                 return charArray_vi;
2621                         case 'j':
2622                                 return charArray_vj;
2623                         case 'k':
2624                                 return charArray_vk;
2625                         case 'l':
2626                                 return charArray_vl;
2627                         case 'm':
2628                                 return charArray_vm;
2629                         case 'n':
2630                                 return charArray_vn;
2631                         case 'o':
2632                                 return charArray_vo;
2633                         case 'p':
2634                                 return charArray_vp;
2635                         case 'q':
2636                                 return charArray_vq;
2637                         case 'r':
2638                                 return charArray_vr;
2639                         case 's':
2640                                 return charArray_vs;
2641                         case 't':
2642                                 return charArray_vt;
2643                         case 'u':
2644                                 return charArray_vu;
2645                         case 'v':
2646                                 return charArray_vv;
2647                         case 'w':
2648                                 return charArray_vw;
2649                         case 'x':
2650                                 return charArray_vx;
2651                         case 'y':
2652                                 return charArray_vy;
2653                         case 'z':
2654                                 return charArray_vz;
2655                         }
2656                 }
2657                 // try to return the same char[] build only once
2658                 int hash = ((c0 << 6) + c1) % TableSize;
2659                 char[][] table = charArray_length[0][hash];
2660                 int i = newEntry2;
2661                 while (++i < InternalTableSize) {
2662                         char[] charArray = table[i];
2663                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2664                                 return charArray;
2665                 }
2666                 // ---------other side---------
2667                 i = -1;
2668                 int max = newEntry2;
2669                 while (++i <= max) {
2670                         char[] charArray = table[i];
2671                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2672                                 return charArray;
2673                 }
2674                 // --------add the entry-------
2675                 if (++max >= InternalTableSize)
2676                         max = 0;
2677                 char[] r;
2678                 table[max] = (r = new char[] { c0, c1 });
2679                 newEntry2 = max;
2680                 return r;
2681         }
2682
2683         final char[] optimizedCurrentTokenSource3() {
2684                 // try to return the same char[] build only once
2685                 char c0, c1, c2;
2686                 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2687                                 % TableSize;
2688                 char[][] table = charArray_length[1][hash];
2689                 int i = newEntry3;
2690                 while (++i < InternalTableSize) {
2691                         char[] charArray = table[i];
2692                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2693                                 return charArray;
2694                 }
2695                 // ---------other side---------
2696                 i = -1;
2697                 int max = newEntry3;
2698                 while (++i <= max) {
2699                         char[] charArray = table[i];
2700                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2701                                 return charArray;
2702                 }
2703                 // --------add the entry-------
2704                 if (++max >= InternalTableSize)
2705                         max = 0;
2706                 char[] r;
2707                 table[max] = (r = new char[] { c0, c1, c2 });
2708                 newEntry3 = max;
2709                 return r;
2710         }
2711
2712         final char[] optimizedCurrentTokenSource4() {
2713                 // try to return the same char[] build only once
2714                 char c0, c1, c2, c3;
2715                 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2716                                 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2717                                 % TableSize;
2718                 char[][] table = charArray_length[2][(int) hash];
2719                 int i = newEntry4;
2720                 while (++i < InternalTableSize) {
2721                         char[] charArray = table[i];
2722                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2723                                 return charArray;
2724                 }
2725                 // ---------other side---------
2726                 i = -1;
2727                 int max = newEntry4;
2728                 while (++i <= max) {
2729                         char[] charArray = table[i];
2730                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2731                                 return charArray;
2732                 }
2733                 // --------add the entry-------
2734                 if (++max >= InternalTableSize)
2735                         max = 0;
2736                 char[] r;
2737                 table[max] = (r = new char[] { c0, c1, c2, c3 });
2738                 newEntry4 = max;
2739                 return r;
2740         }
2741
2742         final char[] optimizedCurrentTokenSource5() {
2743                 // try to return the same char[] build only once
2744                 char c0, c1, c2, c3, c4;
2745                 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2746                                 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2747                                 % TableSize;
2748                 char[][] table = charArray_length[3][(int) hash];
2749                 int i = newEntry5;
2750                 while (++i < InternalTableSize) {
2751                         char[] charArray = table[i];
2752                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2753                                 return charArray;
2754                 }
2755                 // ---------other side---------
2756                 i = -1;
2757                 int max = newEntry5;
2758                 while (++i <= max) {
2759                         char[] charArray = table[i];
2760                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2761                                 return charArray;
2762                 }
2763                 // --------add the entry-------
2764                 if (++max >= InternalTableSize)
2765                         max = 0;
2766                 char[] r;
2767                 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2768                 newEntry5 = max;
2769                 return r;
2770         }
2771
2772         final char[] optimizedCurrentTokenSource6() {
2773                 // try to return the same char[] build only once
2774                 char c0, c1, c2, c3, c4, c5;
2775                 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2776                                 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2777                                 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2778                                 % TableSize;
2779                 char[][] table = charArray_length[4][(int) hash];
2780                 int i = newEntry6;
2781                 while (++i < InternalTableSize) {
2782                         char[] charArray = table[i];
2783                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2784                                         && (c5 == charArray[5]))
2785                                 return charArray;
2786                 }
2787                 // ---------other side---------
2788                 i = -1;
2789                 int max = newEntry6;
2790                 while (++i <= max) {
2791                         char[] charArray = table[i];
2792                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2793                                         && (c5 == charArray[5]))
2794                                 return charArray;
2795                 }
2796                 // --------add the entry-------
2797                 if (++max >= InternalTableSize)
2798                         max = 0;
2799                 char[] r;
2800                 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2801                 newEntry6 = max;
2802                 return r;
2803         }
2804
2805         public final void pushLineSeparator() throws InvalidInputException {
2806                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2807                 final int INCREMENT = 250;
2808                 if (this.checkNonExternalizedStringLiterals) {
2809                         // reinitialize the current line for non externalize strings purpose
2810                         currentLine = null;
2811                 }
2812                 // currentCharacter is at position currentPosition-1
2813                 // cr 000D
2814                 if (currentCharacter == '\r') {
2815                         int separatorPos = currentPosition - 1;
2816                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2817                                 return;
2818                         // System.out.println("CR-" + separatorPos);
2819                         try {
2820                                 lineEnds[++linePtr] = separatorPos;
2821                         } catch (IndexOutOfBoundsException e) {
2822                                 // linePtr value is correct
2823                                 int oldLength = lineEnds.length;
2824                                 int[] old = lineEnds;
2825                                 lineEnds = new int[oldLength + INCREMENT];
2826                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2827                                 lineEnds[linePtr] = separatorPos;
2828                         }
2829                         // look-ahead for merged cr+lf
2830                         try {
2831                                 if (source[currentPosition] == '\n') {
2832                                         // System.out.println("look-ahead LF-" + currentPosition);
2833                                         lineEnds[linePtr] = currentPosition;
2834                                         currentPosition++;
2835                                         wasAcr = false;
2836                                 } else {
2837                                         wasAcr = true;
2838                                 }
2839                         } catch (IndexOutOfBoundsException e) {
2840                                 wasAcr = true;
2841                         }
2842                 } else {
2843                         // lf 000A
2844                         if (currentCharacter == '\n') {
2845                                 // must merge eventual cr followed by lf
2846                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2847                                         // System.out.println("merge LF-" + (currentPosition - 1));
2848                                         lineEnds[linePtr] = currentPosition - 1;
2849                                 } else {
2850                                         int separatorPos = currentPosition - 1;
2851                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2852                                                 return;
2853                                         // System.out.println("LF-" + separatorPos);
2854                                         try {
2855                                                 lineEnds[++linePtr] = separatorPos;
2856                                         } catch (IndexOutOfBoundsException e) {
2857                                                 // linePtr value is correct
2858                                                 int oldLength = lineEnds.length;
2859                                                 int[] old = lineEnds;
2860                                                 lineEnds = new int[oldLength + INCREMENT];
2861                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2862                                                 lineEnds[linePtr] = separatorPos;
2863                                         }
2864                                 }
2865                                 wasAcr = false;
2866                         }
2867                 }
2868         }
2869
2870         public final void pushUnicodeLineSeparator() {
2871                 // isUnicode means that the \r or \n has been read as a unicode character
2872                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2873                 final int INCREMENT = 250;
2874                 // currentCharacter is at position currentPosition-1
2875                 if (this.checkNonExternalizedStringLiterals) {
2876                         // reinitialize the current line for non externalize strings purpose
2877                         currentLine = null;
2878                 }
2879                 // cr 000D
2880                 if (currentCharacter == '\r') {
2881                         int separatorPos = currentPosition - 6;
2882                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2883                                 return;
2884                         // System.out.println("CR-" + separatorPos);
2885                         try {
2886                                 lineEnds[++linePtr] = separatorPos;
2887                         } catch (IndexOutOfBoundsException e) {
2888                                 // linePtr value is correct
2889                                 int oldLength = lineEnds.length;
2890                                 int[] old = lineEnds;
2891                                 lineEnds = new int[oldLength + INCREMENT];
2892                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2893                                 lineEnds[linePtr] = separatorPos;
2894                         }
2895                         // look-ahead for merged cr+lf
2896                         if (source[currentPosition] == '\n') {
2897                                 // System.out.println("look-ahead LF-" + currentPosition);
2898                                 lineEnds[linePtr] = currentPosition;
2899                                 currentPosition++;
2900                                 wasAcr = false;
2901                         } else {
2902                                 wasAcr = true;
2903                         }
2904                 } else {
2905                         // lf 000A
2906                         if (currentCharacter == '\n') {
2907                                 // must merge eventual cr followed by lf
2908                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2909                                         // System.out.println("merge LF-" + (currentPosition - 1));
2910                                         lineEnds[linePtr] = currentPosition - 6;
2911                                 } else {
2912                                         int separatorPos = currentPosition - 6;
2913                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2914                                                 return;
2915                                         // System.out.println("LF-" + separatorPos);
2916                                         try {
2917                                                 lineEnds[++linePtr] = separatorPos;
2918                                         } catch (IndexOutOfBoundsException e) {
2919                                                 // linePtr value is correct
2920                                                 int oldLength = lineEnds.length;
2921                                                 int[] old = lineEnds;
2922                                                 lineEnds = new int[oldLength + INCREMENT];
2923                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2924                                                 lineEnds[linePtr] = separatorPos;
2925                                         }
2926                                 }
2927                                 wasAcr = false;
2928                         }
2929                 }
2930         }
2931
2932         public void recordComment(int token) {
2933                 // compute position
2934                 int stopPosition = this.currentPosition;
2935                 switch (token) {
2936                 case TokenNameCOMMENT_LINE:
2937                         stopPosition = -this.lastCommentLinePosition;
2938                         break;
2939                 case TokenNameCOMMENT_BLOCK:
2940                         stopPosition = -this.currentPosition;
2941                         break;
2942                 }
2943
2944                 // a new comment is recorded
2945                 int length = this.commentStops.length;
2946                 if (++this.commentPtr >= length) {
2947                         System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2948                         // grows the positions buffers too
2949                         System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2950                 }
2951                 this.commentStops[this.commentPtr] = stopPosition;
2952                 this.commentStarts[this.commentPtr] = this.startPosition;
2953         }
2954
2955         // public final void recordComment(boolean isJavadoc) {
2956         // // a new annotation comment is recorded
2957         // try {
2958         // commentStops[++commentPtr] = isJavadoc
2959         // ? currentPosition
2960         // : -currentPosition;
2961         // } catch (IndexOutOfBoundsException e) {
2962         // int oldStackLength = commentStops.length;
2963         // int[] oldStack = commentStops;
2964         // commentStops = new int[oldStackLength + 30];
2965         // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2966         // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2967         // //grows the positions buffers too
2968         // int[] old = commentStarts;
2969         // commentStarts = new int[oldStackLength + 30];
2970         // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2971         // }
2972         // //the buffer is of a correct size here
2973         // commentStarts[commentPtr] = startPosition;
2974         // }
2975         public void resetTo(int begin, int end) {
2976                 // reset the scanner to a given position where it may rescan again
2977                 diet = false;
2978                 initialPosition = startPosition = currentPosition = begin;
2979                 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2980                 commentPtr = -1; // reset comment stack
2981         }
2982
2983         public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2984                 // the string with "\\u" is a legal string of two chars \ and u
2985                 // thus we use a direct access to the source (for regular cases).
2986                 // if (unicodeAsBackSlash) {
2987                 // // consume next character
2988                 // unicodeAsBackSlash = false;
2989                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2990                 // && (source[currentPosition] == 'u')) {
2991                 // getNextUnicodeChar();
2992                 // } else {
2993                 // if (withoutUnicodePtr != 0) {
2994                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2995                 // }
2996                 // }
2997                 // } else
2998                 currentCharacter = source[currentPosition++];
2999                 switch (currentCharacter) {
3000                 case '\'':
3001                         currentCharacter = '\'';
3002                         break;
3003                 case '\\':
3004                         currentCharacter = '\\';
3005                         break;
3006                 default:
3007                         currentCharacter = '\\';
3008                         currentPosition--;
3009                 }
3010         }
3011
3012         public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3013                 currentCharacter = source[currentPosition++];
3014                 switch (currentCharacter) {
3015                 // case 'b' :
3016                 // currentCharacter = '\b';
3017                 // break;
3018                 case 't':
3019                         currentCharacter = '\t';
3020                         break;
3021                 case 'n':
3022                         currentCharacter = '\n';
3023                         break;
3024                 // case 'f' :
3025                 // currentCharacter = '\f';
3026                 // break;
3027                 case 'r':
3028                         currentCharacter = '\r';
3029                         break;
3030                 case '\"':
3031                         currentCharacter = '\"';
3032                         break;
3033                 case '\'':
3034                         currentCharacter = '\'';
3035                         break;
3036                 case '\\':
3037                         currentCharacter = '\\';
3038                         break;
3039                 case '$':
3040                         currentCharacter = '$';
3041                         break;
3042                 default:
3043                         // -----------octal escape--------------
3044                         // OctalDigit
3045                         // OctalDigit OctalDigit
3046                         // ZeroToThree OctalDigit OctalDigit
3047                         int number = Character.getNumericValue(currentCharacter);
3048                         if (number >= 0 && number <= 7) {
3049                                 boolean zeroToThreeNot = number > 3;
3050                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3051                                         int digit = Character.getNumericValue(currentCharacter);
3052                                         if (digit >= 0 && digit <= 7) {
3053                                                 number = (number * 8) + digit;
3054                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3055                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3056                                                                 // Digit --> ignore last character
3057                                                                 currentPosition--;
3058                                                         } else {
3059                                                                 digit = Character.getNumericValue(currentCharacter);
3060                                                                 if (digit >= 0 && digit <= 7) {
3061                                                                         // has read \ZeroToThree OctalDigit OctalDigit
3062                                                                         number = (number * 8) + digit;
3063                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3064                                                                         // --> ignore last character
3065                                                                         currentPosition--;
3066                                                                 }
3067                                                         }
3068                                                 } else { // has read \OctalDigit NonDigit--> ignore last
3069                                                         // character
3070                                                         currentPosition--;
3071                                                 }
3072                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
3073                                                 // character
3074                                                 currentPosition--;
3075                                         }
3076                                 } else { // has read \OctalDigit --> ignore last character
3077                                         currentPosition--;
3078                                 }
3079                                 if (number > 255)
3080                                         throw new InvalidInputException(INVALID_ESCAPE);
3081                                 currentCharacter = (char) number;
3082                         }
3083                 // else
3084                 // throw new InvalidInputException(INVALID_ESCAPE);
3085                 }
3086         }
3087
3088         // public int scanIdentifierOrKeyword() throws InvalidInputException {
3089         // return scanIdentifierOrKeyword( false );
3090         // }
3091         public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3092                 // test keywords
3093                 // first dispatch on the first char.
3094                 // then the length. If there are several
3095                 // keywors with the same length AND the same first char, then do another
3096                 // disptach on the second char :-)...cool....but fast !
3097                 useAssertAsAnIndentifier = false;
3098                 while (getNextCharAsJavaIdentifierPart()) {
3099                 }
3100                 ;
3101                 if (isVariable) {
3102                         // if (new String(getCurrentTokenSource()).equals("$this")) {
3103                         // return TokenNamethis;
3104                         // }
3105                         return TokenNameVariable;
3106                 }
3107                 int index, length;
3108                 char[] data;
3109                 char firstLetter;
3110                 // if (withoutUnicodePtr == 0)
3111                 // quick test on length == 1 but not on length > 12 while most identifier
3112                 // have a length which is <= 12...but there are lots of identifier with
3113                 // only one char....
3114                 // {
3115                 if ((length = currentPosition - startPosition) == 1)
3116                         return TokenNameIdentifier;
3117                 // data = source;
3118                 data = new char[length];
3119                 index = startPosition;
3120                 for (int i = 0; i < length; i++) {
3121                         data[i] = Character.toLowerCase(source[index + i]);
3122                 }
3123                 index = 0;
3124                 // } else {
3125                 // if ((length = withoutUnicodePtr) == 1)
3126                 // return TokenNameIdentifier;
3127                 // // data = withoutUnicodeBuffer;
3128                 // data = new char[withoutUnicodeBuffer.length];
3129                 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3130                 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3131                 // }
3132                 // index = 1;
3133                 // }
3134                 firstLetter = data[index];
3135                 switch (firstLetter) {
3136                 case '_':
3137                         switch (length) {
3138                         case 8:
3139                                 // __FILE__
3140                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3141                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3142                                         return TokenNameFILE;
3143                                 index = 0; // __LINE__
3144                                 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3145                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3146                                         return TokenNameLINE;
3147                                 break;
3148                         case 9:
3149                                 // __CLASS__
3150                                 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3151                                                 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3152                                         return TokenNameCLASS_C;
3153                                 break;
3154                         case 11:
3155                                 // __METHOD__
3156                                 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3157                                                 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3158                                                 && (data[++index] == '_'))
3159                                         return TokenNameMETHOD_C;
3160                                 break;
3161                         case 12:
3162                                 // __FUNCTION__
3163                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3164                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3165                                                 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3166                                         return TokenNameFUNC_C;
3167                                 break;
3168                         }
3169                         return TokenNameIdentifier;
3170                 case 'a':
3171                         // as and array abstract
3172                         switch (length) {
3173                         case 2:
3174                                 // as
3175                                 if ((data[++index] == 's')) {
3176                                         return TokenNameas;
3177                                 } else {
3178                                         return TokenNameIdentifier;
3179                                 }
3180                         case 3:
3181                                 // and
3182                                 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3183                                         return TokenNameand;
3184                                 } else {
3185                                         return TokenNameIdentifier;
3186                                 }
3187                         case 5:
3188                                 // array
3189                                 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3190                                         return TokenNamearray;
3191                                 else
3192                                         return TokenNameIdentifier;
3193                         case 8:
3194                                 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3195                                                 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3196                                         return TokenNameabstract;
3197                                 else
3198                                         return TokenNameIdentifier;
3199                         default:
3200                                 return TokenNameIdentifier;
3201                         }
3202                 case 'b':
3203                         // break
3204                         switch (length) {
3205                         case 5:
3206                                 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3207                                         return TokenNamebreak;
3208                                 else
3209                                         return TokenNameIdentifier;
3210                         default:
3211                                 return TokenNameIdentifier;
3212                         }
3213                 case 'c':
3214                         // case catch class clone const continue
3215                         switch (length) {
3216                         case 4:
3217                                 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3218                                         return TokenNamecase;
3219                                 else
3220                                         return TokenNameIdentifier;
3221                         case 5:
3222                                 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3223                                         return TokenNamecatch;
3224                                 index = 0;
3225                                 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3226                                         return TokenNameclass;
3227                                 index = 0;
3228                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3229                                         return TokenNameclone;
3230                                 index = 0;
3231                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3232                                         return TokenNameconst;
3233                                 else
3234                                         return TokenNameIdentifier;
3235                         case 8:
3236                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3237                                                 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3238                                         return TokenNamecontinue;
3239                                 else
3240                                         return TokenNameIdentifier;
3241                         default:
3242                                 return TokenNameIdentifier;
3243                         }
3244                 case 'd':
3245                         // declare default do die
3246                         // TODO delete define ==> no keyword !
3247                         switch (length) {
3248                         case 2:
3249                                 if ((data[++index] == 'o'))
3250                                         return TokenNamedo;
3251                                 else
3252                                         return TokenNameIdentifier;
3253                         // case 6 :
3254                         // if ((data[++index] == 'e')
3255                         // && (data[++index] == 'f')
3256                         // && (data[++index] == 'i')
3257                         // && (data[++index] == 'n')
3258                         // && (data[++index] == 'e'))
3259                         // return TokenNamedefine;
3260                         // else
3261                         // return TokenNameIdentifier;
3262                         case 7:
3263                                 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3264                                                 && (data[++index] == 'r') && (data[++index] == 'e'))
3265                                         return TokenNamedeclare;
3266                                 index = 0;
3267                                 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3268                                                 && (data[++index] == 'l') && (data[++index] == 't'))
3269                                         return TokenNamedefault;
3270                                 else
3271                                         return TokenNameIdentifier;
3272                         default:
3273                                 return TokenNameIdentifier;
3274                         }
3275                 case 'e':
3276                         // echo else exit elseif extends eval
3277                         switch (length) {
3278                         case 4:
3279                                 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3280                                         return TokenNameecho;
3281                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3282                                         return TokenNameelse;
3283                                 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3284                                         return TokenNameexit;
3285                                 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3286                                         return TokenNameeval;
3287                                 else
3288                                         return TokenNameIdentifier;
3289                         case 5:
3290                                 // endif empty
3291                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3292                                         return TokenNameendif;
3293                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3294                                         return TokenNameempty;
3295                                 else
3296                                         return TokenNameIdentifier;
3297                         case 6:
3298                                 // endfor
3299                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3300                                                 && (data[++index] == 'r'))
3301                                         return TokenNameendfor;
3302                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3303                                                 && (data[++index] == 'f'))
3304                                         return TokenNameelseif;
3305                                 else
3306                                         return TokenNameIdentifier;
3307                         case 7:
3308                                 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3309                                                 && (data[++index] == 'd') && (data[++index] == 's'))
3310                                         return TokenNameextends;
3311                                 else
3312                                         return TokenNameIdentifier;
3313                         case 8:
3314                                 // endwhile
3315                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3316                                                 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3317                                         return TokenNameendwhile;
3318                                 else
3319                                         return TokenNameIdentifier;
3320                         case 9:
3321                                 // endswitch
3322                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3323                                                 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3324                                         return TokenNameendswitch;
3325                                 else
3326                                         return TokenNameIdentifier;
3327                         case 10:
3328                                 // enddeclare
3329                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3330                                                 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3331                                                 && (data[++index] == 'e'))
3332                                         return TokenNameenddeclare;
3333                                 index = 0;
3334                                 if ((data[++index] == 'n') // endforeach
3335                                                 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3336                                                 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3337                                         return TokenNameendforeach;
3338                                 else
3339                                         return TokenNameIdentifier;
3340                         default:
3341                                 return TokenNameIdentifier;
3342                         }
3343                 case 'f':
3344                         // for false final function
3345                         switch (length) {
3346                         case 3:
3347                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3348                                         return TokenNamefor;
3349                                 else
3350                                         return TokenNameIdentifier;
3351                         case 5:
3352                                 // if ((data[++index] == 'a') && (data[++index] == 'l')
3353                                 // && (data[++index] == 's') && (data[++index] == 'e'))
3354                                 // return TokenNamefalse;
3355                                 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3356                                         return TokenNamefinal;
3357                                 else
3358                                         return TokenNameIdentifier;
3359                         case 7:
3360                                 // foreach
3361                                 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3362                                                 && (data[++index] == 'c') && (data[++index] == 'h'))
3363                                         return TokenNameforeach;
3364                                 else
3365                                         return TokenNameIdentifier;
3366                         case 8:
3367                                 // function
3368                                 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3369                                                 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3370                                         return TokenNamefunction;
3371                                 else
3372                                         return TokenNameIdentifier;
3373                         default:
3374                                 return TokenNameIdentifier;
3375                         }
3376                 case 'g':
3377                         // global
3378                         if (length == 6) {
3379                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3380                                                 && (data[++index] == 'l')) {
3381                                         return TokenNameglobal;
3382                                 }
3383                         }
3384                         return TokenNameIdentifier;
3385                 case 'i':
3386                         // if int isset include include_once instanceof interface implements
3387                         switch (length) {
3388                         case 2:
3389                                 if (data[++index] == 'f')
3390                                         return TokenNameif;
3391                                 else
3392                                         return TokenNameIdentifier;
3393                         // case 3 :
3394                         // if ((data[++index] == 'n') && (data[++index] == 't'))
3395                         // return TokenNameint;
3396                         // else
3397                         // return TokenNameIdentifier;
3398                         case 5:
3399                                 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3400                                         return TokenNameisset;
3401                                 else
3402                                         return TokenNameIdentifier;
3403                         case 7:
3404                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3405                                                 && (data[++index] == 'd') && (data[++index] == 'e'))
3406                                         return TokenNameinclude;
3407                                 else
3408                                         return TokenNameIdentifier;
3409                         case 9:
3410                                 // interface
3411                                 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3412                                                 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3413                                         return TokenNameinterface;
3414                                 else
3415                                         return TokenNameIdentifier;
3416                         case 10:
3417                                 // instanceof
3418                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3419                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3420                                                 && (data[++index] == 'f'))
3421                                         return TokenNameinstanceof;
3422                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3423                                                 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3424                                                 && (data[++index] == 's'))
3425                                         return TokenNameimplements;
3426                                 else
3427                                         return TokenNameIdentifier;
3428                         case 12:
3429                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3430                                                 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3431                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3432                                         return TokenNameinclude_once;
3433                                 else
3434                                         return TokenNameIdentifier;
3435                         default:
3436                                 return TokenNameIdentifier;
3437                         }
3438                 case 'l':
3439                         // list
3440                         if (length == 4) {
3441                                 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3442                                         return TokenNamelist;
3443                                 }
3444                         }
3445                         return TokenNameIdentifier;
3446                 case 'n':
3447                         // new null
3448                         switch (length) {
3449                         case 3:
3450                                 if ((data[++index] == 'e') && (data[++index] == 'w'))
3451                                         return TokenNamenew;
3452                                 else
3453                                         return TokenNameIdentifier;
3454                         // case 4 :
3455                         // if ((data[++index] == 'u') && (data[++index] == 'l')
3456                         // && (data[++index] == 'l'))
3457                         // return TokenNamenull;
3458                         // else
3459                         // return TokenNameIdentifier;
3460                         default:
3461                                 return TokenNameIdentifier;
3462                         }
3463                 case 'o':
3464                         // or old_function
3465                         if (length == 2) {
3466                                 if (data[++index] == 'r') {
3467                                         return TokenNameor;
3468                                 }
3469                         }
3470                         // if (length == 12) {
3471                         // if ((data[++index] == 'l')
3472                         // && (data[++index] == 'd')
3473                         // && (data[++index] == '_')
3474                         // && (data[++index] == 'f')
3475                         // && (data[++index] == 'u')
3476                         // && (data[++index] == 'n')
3477                         // && (data[++index] == 'c')
3478                         // && (data[++index] == 't')
3479                         // && (data[++index] == 'i')
3480                         // && (data[++index] == 'o')
3481                         // && (data[++index] == 'n')) {
3482                         // return TokenNameold_function;
3483                         // }
3484                         // }
3485                         return TokenNameIdentifier;
3486                 case 'p':
3487                         // print public private protected
3488                         switch (length) {
3489                         case 5:
3490                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3491                                         return TokenNameprint;
3492                                 } else
3493                                         return TokenNameIdentifier;
3494                         case 6:
3495                                 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3496                                                 && (data[++index] == 'c')) {
3497                                         return TokenNamepublic;
3498                                 } else
3499                                         return TokenNameIdentifier;
3500                         case 7:
3501                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3502                                                 && (data[++index] == 't') && (data[++index] == 'e')) {
3503                                         return TokenNameprivate;
3504                                 } else
3505                                         return TokenNameIdentifier;
3506                         case 9:
3507                                 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3508                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3509                                         return TokenNameprotected;
3510                                 } else
3511                                         return TokenNameIdentifier;
3512                         }
3513                         return TokenNameIdentifier;
3514                 case 'r':
3515                         // return require require_once
3516                         if (length == 6) {
3517                                 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3518                                                 && (data[++index] == 'n')) {
3519                                         return TokenNamereturn;
3520                                 }
3521                         } else if (length == 7) {
3522                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3523                                                 && (data[++index] == 'r') && (data[++index] == 'e')) {
3524                                         return TokenNamerequire;
3525                                 }
3526                         } else if (length == 12) {
3527                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3528                                                 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3529                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3530                                         return TokenNamerequire_once;
3531                                 }
3532                         } else
3533                                 return TokenNameIdentifier;
3534                 case 's':
3535                         // self static switch
3536                         switch (length) {
3537 //                      case 4:
3538 //                              if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index] == 'f')) {
3539 //                                      return TokenNameself;
3540 //                              }
3541 //                              return TokenNameIdentifier;
3542                         case 6:
3543                                 if (data[++index] == 't')
3544                                         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3545                                                 return TokenNamestatic;
3546                                         } else
3547                                                 return TokenNameIdentifier;
3548                                 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3549                                                 && (data[++index] == 'h'))
3550                                         return TokenNameswitch;
3551                                 else
3552                                         return TokenNameIdentifier;
3553                         default:
3554                                 return TokenNameIdentifier;
3555                         }
3556                 case 't':
3557                         // try true throw
3558                         switch (length) {
3559                         case 3:
3560                                 if ((data[++index] == 'r') && (data[++index] == 'y'))
3561                                         return TokenNametry;
3562                                 else
3563                                         return TokenNameIdentifier;
3564                         // case 4 :
3565                         // if ((data[++index] == 'r') && (data[++index] == 'u')
3566                         // && (data[++index] == 'e'))
3567                         // return TokenNametrue;
3568                         // else
3569                         // return TokenNameIdentifier;
3570                         case 5:
3571                                 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3572                                         return TokenNamethrow;
3573                                 else
3574                                         return TokenNameIdentifier;
3575                         default:
3576                                 return TokenNameIdentifier;
3577                         }
3578                 case 'u':
3579                         // use unset
3580                         switch (length) {
3581                         case 3:
3582                                 if ((data[++index] == 's') && (data[++index] == 'e'))
3583                                         return TokenNameuse;
3584                                 else
3585                                         return TokenNameIdentifier;
3586                         case 5:
3587                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3588                                         return TokenNameunset;
3589                                 else
3590                                         return TokenNameIdentifier;
3591                         default:
3592                                 return TokenNameIdentifier;
3593                         }
3594                 case 'v':
3595                         // var
3596                         switch (length) {
3597                         case 3:
3598                                 if ((data[++index] == 'a') && (data[++index] == 'r'))
3599                                         return TokenNamevar;
3600                                 else
3601                                         return TokenNameIdentifier;
3602                         default:
3603                                 return TokenNameIdentifier;
3604                         }
3605                 case 'w':
3606                         // while
3607                         switch (length) {
3608                         case 5:
3609                                 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3610                                         return TokenNamewhile;
3611                                 else
3612                                         return TokenNameIdentifier;
3613                         // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3614                         // (data[++index]=='e') && (data[++index]=='f')&&
3615                         // (data[++index]=='p'))
3616                         // return TokenNamewidefp ;
3617                         // else
3618                         // return TokenNameIdentifier;
3619                         default:
3620                                 return TokenNameIdentifier;
3621                         }
3622                 case 'x':
3623                         // xor
3624                         switch (length) {
3625                         case 3:
3626                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3627                                         return TokenNamexor;
3628                                 else
3629                                         return TokenNameIdentifier;
3630                         default:
3631                                 return TokenNameIdentifier;
3632                         }
3633                 default:
3634                         return TokenNameIdentifier;
3635                 }
3636         }
3637
3638         public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3639                 // when entering this method the currentCharacter is the firt
3640                 // digit of the number , i.e. it may be preceeded by a . when
3641                 // dotPrefix is true
3642                 boolean floating = dotPrefix;
3643                 if ((!dotPrefix) && (currentCharacter == '0')) {
3644                         if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3645                                 // force the first char of the hexa number do exist...
3646                                 // consume next character
3647                                 unicodeAsBackSlash = false;
3648                                 currentCharacter = source[currentPosition++];
3649                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3650                                 // && (source[currentPosition] == 'u')) {
3651                                 // getNextUnicodeChar();
3652                                 // } else {
3653                                 // if (withoutUnicodePtr != 0) {
3654                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3655                                 // }
3656                                 // }
3657                                 if (Character.digit(currentCharacter, 16) == -1)
3658                                         throw new InvalidInputException(INVALID_HEXA);
3659                                 // ---end forcing--
3660                                 while (getNextCharAsDigit(16)) {
3661                                 }
3662                                 ;
3663                                 // if (getNextChar('l', 'L') >= 0)
3664                                 // return TokenNameLongLiteral;
3665                                 // else
3666                                 return TokenNameIntegerLiteral;
3667                         }
3668                         // there is x or X in the number
3669                         // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3670                         // 00078.0 is true !!!!! crazy language
3671                         if (getNextCharAsDigit()) {
3672                                 // -------------potential octal-----------------
3673                                 while (getNextCharAsDigit()) {
3674                                 }
3675                                 ;
3676                                 // if (getNextChar('l', 'L') >= 0) {
3677                                 // return TokenNameLongLiteral;
3678                                 // }
3679                                 //
3680                                 // if (getNextChar('f', 'F') >= 0) {
3681                                 // return TokenNameFloatingPointLiteral;
3682                                 // }
3683                                 if (getNextChar('d', 'D') >= 0) {
3684                                         return TokenNameDoubleLiteral;
3685                                 } else { // make the distinction between octal and float ....
3686                                         if (getNextChar('.')) { // bingo ! ....
3687                                                 while (getNextCharAsDigit()) {
3688                                                 }
3689                                                 ;
3690                                                 if (getNextChar('e', 'E') >= 0) {
3691                                                         // consume next character
3692                                                         unicodeAsBackSlash = false;
3693                                                         currentCharacter = source[currentPosition++];
3694                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3695                                                         // && (source[currentPosition] == 'u')) {
3696                                                         // getNextUnicodeChar();
3697                                                         // } else {
3698                                                         // if (withoutUnicodePtr != 0) {
3699                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3700                                                         // }
3701                                                         // }
3702                                                         if ((currentCharacter == '-') || (currentCharacter == '+')) {
3703                                                                 // consume next character
3704                                                                 unicodeAsBackSlash = false;
3705                                                                 currentCharacter = source[currentPosition++];
3706                                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3707                                                                 // && (source[currentPosition] == 'u')) {
3708                                                                 // getNextUnicodeChar();
3709                                                                 // } else {
3710                                                                 // if (withoutUnicodePtr != 0) {
3711                                                                 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3712                                                                 // currentCharacter;
3713                                                                 // }
3714                                                                 // }
3715                                                         }
3716                                                         if (!Character.isDigit(currentCharacter))
3717                                                                 throw new InvalidInputException(INVALID_FLOAT);
3718                                                         while (getNextCharAsDigit()) {
3719                                                         }
3720                                                         ;
3721                                                 }
3722                                                 // if (getNextChar('f', 'F') >= 0)
3723                                                 // return TokenNameFloatingPointLiteral;
3724                                                 getNextChar('d', 'D'); // jump over potential d or D
3725                                                 return TokenNameDoubleLiteral;
3726                                         } else {
3727                                                 return TokenNameIntegerLiteral;
3728                                         }
3729                                 }
3730                         } else {
3731                                 /* carry on */
3732                         }
3733                 }
3734                 while (getNextCharAsDigit()) {
3735                 }
3736                 ;
3737                 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3738                 // return TokenNameLongLiteral;
3739                 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3740                         while (getNextCharAsDigit()) {
3741                         }
3742                         ;
3743                         floating = true;
3744                 }
3745                 // if floating is true both exponant and suffix may be optional
3746                 if (getNextChar('e', 'E') >= 0) {
3747                         floating = true;
3748                         // consume next character
3749                         unicodeAsBackSlash = false;
3750                         currentCharacter = source[currentPosition++];
3751                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3752                         // && (source[currentPosition] == 'u')) {
3753                         // getNextUnicodeChar();
3754                         // } else {
3755                         // if (withoutUnicodePtr != 0) {
3756                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3757                         // }
3758                         // }
3759                         if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3760                                 // next
3761                                 // character
3762                                 unicodeAsBackSlash = false;
3763                                 currentCharacter = source[currentPosition++];
3764                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3765                                 // && (source[currentPosition] == 'u')) {
3766                                 // getNextUnicodeChar();
3767                                 // } else {
3768                                 // if (withoutUnicodePtr != 0) {
3769                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3770                                 // }
3771                                 // }
3772                         }
3773                         if (!Character.isDigit(currentCharacter))
3774                                 throw new InvalidInputException(INVALID_FLOAT);
3775                         while (getNextCharAsDigit()) {
3776                         }
3777                         ;
3778                 }
3779                 if (getNextChar('d', 'D') >= 0)
3780                         return TokenNameDoubleLiteral;
3781                 // if (getNextChar('f', 'F') >= 0)
3782                 // return TokenNameFloatingPointLiteral;
3783                 // the long flag has been tested before
3784                 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3785         }
3786
3787         /**
3788          * Search the line number corresponding to a specific position
3789          *
3790          */
3791         public final int getLineNumber(int position) {
3792                 if (lineEnds == null)
3793                         return 1;
3794                 int length = linePtr + 1;
3795                 if (length == 0)
3796                         return 1;
3797                 int g = 0, d = length - 1;
3798                 int m = 0;
3799                 while (g <= d) {
3800                         m = (g + d) / 2;
3801                         if (position < lineEnds[m]) {
3802                                 d = m - 1;
3803                         } else if (position > lineEnds[m]) {
3804                                 g = m + 1;
3805                         } else {
3806                                 return m + 1;
3807                         }
3808                 }
3809                 if (position < lineEnds[m]) {
3810                         return m + 1;
3811                 }
3812                 return m + 2;
3813         }
3814
3815         public void setPHPMode(boolean mode) {
3816                 phpMode = mode;
3817         }
3818
3819         public final void setSource(char[] source) {
3820                 setSource(null, source);
3821         }
3822
3823         public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3824                 // the source-buffer is set to sourceString
3825                 this.compilationUnit = compilationUnit;
3826                 if (source == null) {
3827                         this.source = new char[0];
3828                 } else {
3829                         this.source = source;
3830                 }
3831                 startPosition = -1;
3832                 initialPosition = currentPosition = 0;
3833                 containsAssertKeyword = false;
3834                 withoutUnicodeBuffer = new char[this.source.length];
3835                 // encapsedStringStack = new Stack();
3836         }
3837
3838         public String toString() {
3839                 if (startPosition == source.length)
3840                         return "EOF\n\n" + new String(source); //$NON-NLS-1$
3841                 if (currentPosition > source.length)
3842                         return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3843                 char front[] = new char[startPosition];
3844                 System.arraycopy(source, 0, front, 0, startPosition);
3845                 int middleLength = (currentPosition - 1) - startPosition + 1;
3846                 char middle[];
3847                 if (middleLength > -1) {
3848                         middle = new char[middleLength];
3849                         System.arraycopy(source, startPosition, middle, 0, middleLength);
3850                 } else {
3851                         middle = new char[0];
3852                 }
3853                 char end[] = new char[source.length - (currentPosition - 1)];
3854                 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3855                 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3856                                 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3857                                 + new String(end);
3858         }
3859
3860         public final String toStringAction(int act) {
3861                 switch (act) {
3862                 case TokenNameERROR:
3863                         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3864                 // //$NON-NLS-1$
3865                 case TokenNameINLINE_HTML:
3866                         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3867                 case TokenNameIdentifier:
3868                         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3869                 case TokenNameVariable:
3870                         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3871                 case TokenNameabstract:
3872                         return "abstract"; //$NON-NLS-1$
3873                 case TokenNameand:
3874                         return "AND"; //$NON-NLS-1$
3875                 case TokenNamearray:
3876                         return "array"; //$NON-NLS-1$
3877                 case TokenNameas:
3878                         return "as"; //$NON-NLS-1$
3879                 case TokenNamebreak:
3880                         return "break"; //$NON-NLS-1$
3881                 case TokenNamecase:
3882                         return "case"; //$NON-NLS-1$
3883                 case TokenNameclass:
3884                         return "class"; //$NON-NLS-1$
3885                 case TokenNamecatch:
3886                         return "catch"; //$NON-NLS-1$
3887                 case TokenNameclone:
3888                         //$NON-NLS-1$
3889                         return "clone";
3890                 case TokenNameconst:
3891                         //$NON-NLS-1$
3892                         return "const";
3893                 case TokenNamecontinue:
3894                         return "continue"; //$NON-NLS-1$
3895                 case TokenNamedefault:
3896                         return "default"; //$NON-NLS-1$
3897                 // case TokenNamedefine :
3898                 // return "define"; //$NON-NLS-1$
3899                 case TokenNamedo:
3900                         return "do"; //$NON-NLS-1$
3901                 case TokenNameecho:
3902                         return "echo"; //$NON-NLS-1$
3903                 case TokenNameelse:
3904                         return "else"; //$NON-NLS-1$
3905                 case TokenNameelseif:
3906                         return "elseif"; //$NON-NLS-1$
3907                 case TokenNameendfor:
3908                         return "endfor"; //$NON-NLS-1$
3909                 case TokenNameendforeach:
3910                         return "endforeach"; //$NON-NLS-1$
3911                 case TokenNameendif:
3912                         return "endif"; //$NON-NLS-1$
3913                 case TokenNameendswitch:
3914                         return "endswitch"; //$NON-NLS-1$
3915                 case TokenNameendwhile:
3916                         return "endwhile"; //$NON-NLS-1$
3917                 case TokenNameexit:
3918                         return "exit";
3919                 case TokenNameextends:
3920                         return "extends"; //$NON-NLS-1$
3921                 // case TokenNamefalse :
3922                 // return "false"; //$NON-NLS-1$
3923                 case TokenNamefinal:
3924                         return "final"; //$NON-NLS-1$
3925                 case TokenNamefor:
3926                         return "for"; //$NON-NLS-1$
3927                 case TokenNameforeach:
3928                         return "foreach"; //$NON-NLS-1$
3929                 case TokenNamefunction:
3930                         return "function"; //$NON-NLS-1$
3931                 case TokenNameglobal:
3932                         return "global"; //$NON-NLS-1$
3933                 case TokenNameif:
3934                         return "if"; //$NON-NLS-1$
3935                 case TokenNameimplements:
3936                         return "implements"; //$NON-NLS-1$
3937                 case TokenNameinclude:
3938                         return "include"; //$NON-NLS-1$
3939                 case TokenNameinclude_once:
3940                         return "include_once"; //$NON-NLS-1$
3941                 case TokenNameinstanceof:
3942                         return "instanceof"; //$NON-NLS-1$
3943                 case TokenNameinterface:
3944                         return "interface"; //$NON-NLS-1$
3945                 case TokenNameisset:
3946                         return "isset"; //$NON-NLS-1$
3947                 case TokenNamelist:
3948                         return "list"; //$NON-NLS-1$
3949                 case TokenNamenew:
3950                         return "new"; //$NON-NLS-1$
3951                 // case TokenNamenull :
3952                 // return "null"; //$NON-NLS-1$
3953                 case TokenNameor:
3954                         return "OR"; //$NON-NLS-1$
3955                 case TokenNameprint:
3956                         return "print"; //$NON-NLS-1$
3957                 case TokenNameprivate:
3958                         return "private"; //$NON-NLS-1$
3959                 case TokenNameprotected:
3960                         return "protected"; //$NON-NLS-1$
3961                 case TokenNamepublic:
3962                         return "public"; //$NON-NLS-1$
3963                 case TokenNamerequire:
3964                         return "require"; //$NON-NLS-1$
3965                 case TokenNamerequire_once:
3966                         return "require_once"; //$NON-NLS-1$
3967                 case TokenNamereturn:
3968                         return "return"; //$NON-NLS-1$
3969 //              case TokenNameself:
3970 //                      return "self"; //$NON-NLS-1$
3971                 case TokenNamestatic:
3972                         return "static"; //$NON-NLS-1$
3973                 case TokenNameswitch:
3974                         return "switch"; //$NON-NLS-1$
3975                 // case TokenNametrue :
3976                 // return "true"; //$NON-NLS-1$
3977                 case TokenNameunset:
3978                         return "unset"; //$NON-NLS-1$
3979                 case TokenNamevar:
3980                         return "var"; //$NON-NLS-1$
3981                 case TokenNamewhile:
3982                         return "while"; //$NON-NLS-1$
3983                 case TokenNamexor:
3984                         return "XOR"; //$NON-NLS-1$
3985                 // case TokenNamethis :
3986                 // return "$this"; //$NON-NLS-1$
3987                 case TokenNameIntegerLiteral:
3988                         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3989                 case TokenNameDoubleLiteral:
3990                         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3991                 case TokenNameStringDoubleQuote:
3992                         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3993                 case TokenNameStringSingleQuote:
3994                         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3995                 case TokenNameStringInterpolated:
3996                         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3997                 case TokenNameEncapsedString0:
3998                         return "`"; //$NON-NLS-1$
3999                 // case TokenNameEncapsedString1:
4000                 // return "\'"; //$NON-NLS-1$
4001                 // case TokenNameEncapsedString2:
4002                 // return "\""; //$NON-NLS-1$
4003                 case TokenNameSTRING:
4004                         return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4005                 case TokenNameHEREDOC:
4006                         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4007                 case TokenNamePLUS_PLUS:
4008                         return "++"; //$NON-NLS-1$
4009                 case TokenNameMINUS_MINUS:
4010                         return "--"; //$NON-NLS-1$
4011                 case TokenNameEQUAL_EQUAL:
4012                         return "=="; //$NON-NLS-1$
4013                 case TokenNameEQUAL_EQUAL_EQUAL:
4014                         return "==="; //$NON-NLS-1$
4015                 case TokenNameEQUAL_GREATER:
4016                         return "=>"; //$NON-NLS-1$
4017                 case TokenNameLESS_EQUAL:
4018                         return "<="; //$NON-NLS-1$
4019                 case TokenNameGREATER_EQUAL:
4020                         return ">="; //$NON-NLS-1$
4021                 case TokenNameNOT_EQUAL:
4022                         return "!="; //$NON-NLS-1$
4023                 case TokenNameNOT_EQUAL_EQUAL:
4024                         return "!=="; //$NON-NLS-1$
4025                 case TokenNameLEFT_SHIFT:
4026                         return "<<"; //$NON-NLS-1$
4027                 case TokenNameRIGHT_SHIFT:
4028                         return ">>"; //$NON-NLS-1$
4029                 case TokenNamePLUS_EQUAL:
4030                         return "+="; //$NON-NLS-1$
4031                 case TokenNameMINUS_EQUAL:
4032                         return "-="; //$NON-NLS-1$
4033                 case TokenNameMULTIPLY_EQUAL:
4034                         return "*="; //$NON-NLS-1$
4035                 case TokenNameDIVIDE_EQUAL:
4036                         return "/="; //$NON-NLS-1$
4037                 case TokenNameAND_EQUAL:
4038                         return "&="; //$NON-NLS-1$
4039                 case TokenNameOR_EQUAL:
4040                         return "|="; //$NON-NLS-1$
4041                 case TokenNameXOR_EQUAL:
4042                         return "^="; //$NON-NLS-1$
4043                 case TokenNameREMAINDER_EQUAL:
4044                         return "%="; //$NON-NLS-1$
4045                 case TokenNameDOT_EQUAL:
4046                         return ".="; //$NON-NLS-1$
4047                 case TokenNameLEFT_SHIFT_EQUAL:
4048                         return "<<="; //$NON-NLS-1$
4049                 case TokenNameRIGHT_SHIFT_EQUAL:
4050                         return ">>="; //$NON-NLS-1$
4051                 case TokenNameOR_OR:
4052                         return "||"; //$NON-NLS-1$
4053                 case TokenNameAND_AND:
4054                         return "&&"; //$NON-NLS-1$
4055                 case TokenNamePLUS:
4056                         return "+"; //$NON-NLS-1$
4057                 case TokenNameMINUS:
4058                         return "-"; //$NON-NLS-1$
4059                 case TokenNameMINUS_GREATER:
4060                         return "->";
4061                 case TokenNameNOT:
4062                         return "!"; //$NON-NLS-1$
4063                 case TokenNameREMAINDER:
4064                         return "%"; //$NON-NLS-1$
4065                 case TokenNameXOR:
4066                         return "^"; //$NON-NLS-1$
4067                 case TokenNameAND:
4068                         return "&"; //$NON-NLS-1$
4069                 case TokenNameMULTIPLY:
4070                         return "*"; //$NON-NLS-1$
4071                 case TokenNameOR:
4072                         return "|"; //$NON-NLS-1$
4073                 case TokenNameTWIDDLE:
4074                         return "~"; //$NON-NLS-1$
4075                 case TokenNameTWIDDLE_EQUAL:
4076                         return "~="; //$NON-NLS-1$
4077                 case TokenNameDIVIDE:
4078                         return "/"; //$NON-NLS-1$
4079                 case TokenNameGREATER:
4080                         return ">"; //$NON-NLS-1$
4081                 case TokenNameLESS:
4082                         return "<"; //$NON-NLS-1$
4083                 case TokenNameLPAREN:
4084                         return "("; //$NON-NLS-1$
4085                 case TokenNameRPAREN:
4086                         return ")"; //$NON-NLS-1$
4087                 case TokenNameLBRACE:
4088                         return "{"; //$NON-NLS-1$
4089                 case TokenNameRBRACE:
4090                         return "}"; //$NON-NLS-1$
4091                 case TokenNameLBRACKET:
4092                         return "["; //$NON-NLS-1$
4093                 case TokenNameRBRACKET:
4094                         return "]"; //$NON-NLS-1$
4095                 case TokenNameSEMICOLON:
4096                         return ";"; //$NON-NLS-1$
4097                 case TokenNameQUESTION:
4098                         return "?"; //$NON-NLS-1$
4099                 case TokenNameCOLON:
4100                         return ":"; //$NON-NLS-1$
4101                 case TokenNameCOMMA:
4102                         return ","; //$NON-NLS-1$
4103                 case TokenNameDOT:
4104                         return "."; //$NON-NLS-1$
4105                 case TokenNameEQUAL:
4106                         return "="; //$NON-NLS-1$
4107                 case TokenNameAT:
4108                         return "@";
4109                 case TokenNameDOLLAR:
4110                         return "$";
4111                 case TokenNameDOLLAR_LBRACE:
4112                         return "${";
4113                 case TokenNameLBRACE_DOLLAR:
4114                         return "{$";
4115                 case TokenNameEOF:
4116                         return "EOF"; //$NON-NLS-1$
4117                 case TokenNameWHITESPACE:
4118                         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4119                 case TokenNameCOMMENT_LINE:
4120                         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4121                 case TokenNameCOMMENT_BLOCK:
4122                         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4123                 case TokenNameCOMMENT_PHPDOC:
4124                         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4125                 // case TokenNameHTML :
4126                 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4127                 // //$NON-NLS-1$
4128                 case TokenNameFILE:
4129                         return "__FILE__"; //$NON-NLS-1$
4130                 case TokenNameLINE:
4131                         return "__LINE__"; //$NON-NLS-1$
4132                 case TokenNameCLASS_C:
4133                         return "__CLASS__"; //$NON-NLS-1$
4134                 case TokenNameMETHOD_C:
4135                         return "__METHOD__"; //$NON-NLS-1$
4136                 case TokenNameFUNC_C:
4137                         return "__FUNCTION__"; //$NON-NLS-1
4138                 case TokenNameboolCAST:
4139                         return "( bool )"; //$NON-NLS-1$
4140                 case TokenNameintCAST:
4141                         return "( int )"; //$NON-NLS-1$
4142                 case TokenNamedoubleCAST:
4143                         return "( double )"; //$NON-NLS-1$
4144                 case TokenNameobjectCAST:
4145                         return "( object )"; //$NON-NLS-1$
4146                 case TokenNamestringCAST:
4147                         return "( string )"; //$NON-NLS-1$
4148                 default:
4149                         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4150                 }
4151         }
4152
4153         public Scanner() {
4154                 this(false, false);
4155         }
4156
4157         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4158                 this(tokenizeComments, tokenizeWhiteSpace, false);
4159         }
4160
4161         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4162                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4163         }
4164
4165         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4166                         boolean assertMode) {
4167                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4168         }
4169
4170         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4171                         boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4172                 this.eofPosition = Integer.MAX_VALUE;
4173                 this.tokenizeComments = tokenizeComments;
4174                 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4175                 this.tokenizeStrings = tokenizeStrings;
4176                 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4177                 // this.assertMode = assertMode;
4178                 // this.encapsedStringStack = null;
4179                 this.taskTags = taskTags;
4180                 this.taskPriorities = taskPriorities;
4181         }
4182
4183         private void checkNonExternalizeString() throws InvalidInputException {
4184                 if (currentLine == null)
4185                         return;
4186                 parseTags(currentLine);
4187         }
4188
4189         private void parseTags(NLSLine line) throws InvalidInputException {
4190                 String s = new String(getCurrentTokenSource());
4191                 int pos = s.indexOf(TAG_PREFIX);
4192                 int lineLength = line.size();
4193                 while (pos != -1) {
4194                         int start = pos + TAG_PREFIX_LENGTH;
4195                         int end = s.indexOf(TAG_POSTFIX, start);
4196                         String index = s.substring(start, end);
4197                         int i = 0;
4198                         try {
4199                                 i = Integer.parseInt(index) - 1;
4200                                 // Tags are one based not zero based.
4201                         } catch (NumberFormatException e) {
4202                                 i = -1; // we don't want to consider this as a valid NLS tag
4203                         }
4204                         if (line.exists(i)) {
4205                                 line.set(i, null);
4206                         }
4207                         pos = s.indexOf(TAG_PREFIX, start);
4208                 }
4209                 this.nonNLSStrings = new StringLiteral[lineLength];
4210                 int nonNLSCounter = 0;
4211                 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4212                         StringLiteral literal = (StringLiteral) iterator.next();
4213                         if (literal != null) {
4214                                 this.nonNLSStrings[nonNLSCounter++] = literal;
4215                         }
4216                 }
4217                 if (nonNLSCounter == 0) {
4218                         this.nonNLSStrings = null;
4219                         currentLine = null;
4220                         return;
4221                 }
4222                 this.wasNonExternalizedStringLiteral = true;
4223                 if (nonNLSCounter != lineLength) {
4224                         System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4225                 }
4226                 currentLine = null;
4227         }
4228
4229         public final void scanEscapeCharacter() throws InvalidInputException {
4230                 // the string with "\\u" is a legal string of two chars \ and u
4231                 // thus we use a direct access to the source (for regular cases).
4232                 if (unicodeAsBackSlash) {
4233                         // consume next character
4234                         unicodeAsBackSlash = false;
4235                         // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4236                         // (source[currentPosition] == 'u')) {
4237                         // getNextUnicodeChar();
4238                         // } else {
4239                         if (withoutUnicodePtr != 0) {
4240                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4241                                 // }
4242                         }
4243                 } else
4244                         currentCharacter = source[currentPosition++];
4245                 switch (currentCharacter) {
4246                 case 'b':
4247                         currentCharacter = '\b';
4248                         break;
4249                 case 't':
4250                         currentCharacter = '\t';
4251                         break;
4252                 case 'n':
4253                         currentCharacter = '\n';
4254                         break;
4255                 case 'f':
4256                         currentCharacter = '\f';
4257                         break;
4258                 case 'r':
4259                         currentCharacter = '\r';
4260                         break;
4261                 case '\"':
4262                         currentCharacter = '\"';
4263                         break;
4264                 case '\'':
4265                         currentCharacter = '\'';
4266                         break;
4267                 case '\\':
4268                         currentCharacter = '\\';
4269                         break;
4270                 default:
4271                         // -----------octal escape--------------
4272                         // OctalDigit
4273                         // OctalDigit OctalDigit
4274                         // ZeroToThree OctalDigit OctalDigit
4275                         int number = Character.getNumericValue(currentCharacter);
4276                         if (number >= 0 && number <= 7) {
4277                                 boolean zeroToThreeNot = number > 3;
4278                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4279                                         int digit = Character.getNumericValue(currentCharacter);
4280                                         if (digit >= 0 && digit <= 7) {
4281                                                 number = (number * 8) + digit;
4282                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4283                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4284                                                                 // Digit --> ignore last character
4285                                                                 currentPosition--;
4286                                                         } else {
4287                                                                 digit = Character.getNumericValue(currentCharacter);
4288                                                                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4289                                                                         // OctalDigit OctalDigit
4290                                                                         number = (number * 8) + digit;
4291                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4292                                                                         // --> ignore last character
4293                                                                         currentPosition--;
4294                                                                 }
4295                                                         }
4296                                                 } else { // has read \OctalDigit NonDigit--> ignore last
4297                                                         // character
4298                                                         currentPosition--;
4299                                                 }
4300                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
4301                                                 // character
4302                                                 currentPosition--;
4303                                         }
4304                                 } else { // has read \OctalDigit --> ignore last character
4305                                         currentPosition--;
4306                                 }
4307                                 if (number > 255)
4308                                         throw new InvalidInputException(INVALID_ESCAPE);
4309                                 currentCharacter = (char) number;
4310                         } else
4311                                 throw new InvalidInputException(INVALID_ESCAPE);
4312                 }
4313         }
4314
4315         // chech presence of task: tags
4316         // TODO (frederic) see if we need to take unicode characters into account...
4317         public void checkTaskTag(int commentStart, int commentEnd) {
4318                 char[] src = this.source;
4319
4320                 // only look for newer task: tags
4321                 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4322                         return;
4323                 }
4324                 int foundTaskIndex = this.foundTaskCount;
4325                 char previous = src[commentStart + 1]; // should be '*' or '/'
4326                 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4327                         char[] tag = null;
4328                         char[] priority = null;
4329                         // check for tag occurrence only if not ambiguous with javadoc tag
4330                         if (previous != '@') {
4331                                 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4332                                         tag = this.taskTags[itag];
4333                                         int tagLength = tag.length;
4334                                         if (tagLength == 0)
4335                                                 continue nextTag;
4336
4337                                         // ensure tag is not leaded with letter if tag starts with a letter
4338                                         if (Scanner.isPHPIdentifierStart(tag[0])) {
4339                                                 if (Scanner.isPHPIdentifierPart(previous)) {
4340                                                         continue nextTag;
4341                                                 }
4342                                         }
4343
4344                                         for (int t = 0; t < tagLength; t++) {
4345                                                 char sc, tc;
4346                                                 int x = i + t;
4347                                                 if (x >= this.eofPosition || x >= commentEnd)
4348                                                         continue nextTag;
4349                                                 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4350                                                         if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4351                                                                 // insensitive
4352                                                                 // check
4353                                                                 continue nextTag;
4354                                                         }
4355                                                 }
4356                                         }
4357                                         // ensure tag is not followed with letter if tag finishes with a
4358                                         // letter
4359                                         if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4360                                                 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4361                                                         continue nextTag;
4362                                         }
4363                                         if (this.foundTaskTags == null) {
4364                                                 this.foundTaskTags = new char[5][];
4365                                                 this.foundTaskMessages = new char[5][];
4366                                                 this.foundTaskPriorities = new char[5][];
4367                                                 this.foundTaskPositions = new int[5][];
4368                                         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4369                                                 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4370                                                                 this.foundTaskCount);
4371                                                 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4372                                                                 this.foundTaskCount);
4373                                                 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4374                                                                 this.foundTaskCount);
4375                                                 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4376                                                                 this.foundTaskCount);
4377                                         }
4378
4379                                         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4380
4381                                         this.foundTaskTags[this.foundTaskCount] = tag;
4382                                         this.foundTaskPriorities[this.foundTaskCount] = priority;
4383                                         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4384                                         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4385                                         this.foundTaskCount++;
4386                                         i += tagLength - 1; // will be incremented when looping
4387                                         break nextTag;
4388                                 }
4389                         }
4390                         previous = src[i];
4391                 }
4392                 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4393                         // retrieve message start and end positions
4394                         int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4395                         int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4396                         // at most beginning of next task
4397                         if (max_value < msgStart) {
4398                                 max_value = msgStart; // would only occur if tag is before EOF.
4399                         }
4400                         int end = -1;
4401                         char c;
4402                         for (int j = msgStart; j < max_value; j++) {
4403                                 if ((c = src[j]) == '\n' || c == '\r') {
4404                                         end = j - 1;
4405                                         break;
4406                                 }
4407                         }
4408                         if (end == -1) {
4409                                 for (int j = max_value; j > msgStart; j--) {
4410                                         if ((c = src[j]) == '*') {
4411                                                 end = j - 1;
4412                                                 break;
4413                                         }
4414                                 }
4415                                 if (end == -1)
4416                                         end = max_value;
4417                         }
4418                         if (msgStart == end)
4419                                 continue; // empty
4420                         // trim the message
4421                         while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4422                                 end--;
4423                         while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4424                                 msgStart++;
4425                         // update the end position of the task
4426                         this.foundTaskPositions[i][1] = end;
4427                         // get the message source
4428                         final int messageLength = end - msgStart + 1;
4429                         char[] message = new char[messageLength];
4430                         System.arraycopy(src, msgStart, message, 0, messageLength);
4431                         this.foundTaskMessages[i] = message;
4432                 }
4433         }
4434
4435         // chech presence of task: tags
4436         // public void checkTaskTag(int commentStart, int commentEnd) {
4437         // // only look for newer task: tags
4438         // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4439         // - 1][0] >= commentStart) {
4440         // return;
4441         // }
4442         // int foundTaskIndex = this.foundTaskCount;
4443         // nextChar: for (int i = commentStart; i < commentEnd && i <
4444         // this.eofPosition; i++) {
4445         // char[] tag = null;
4446         // char[] priority = null;
4447         // // check for tag occurrence
4448         // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4449         // tag = this.taskTags[itag];
4450         // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4451         // ? this.taskPriorities[itag] : null;
4452         // int tagLength = tag.length;
4453         // for (int t = 0; t < tagLength; t++) {
4454         // if (this.source[i + t] != tag[t])
4455         // continue nextTag;
4456         // }
4457         // if (this.foundTaskTags == null) {
4458         // this.foundTaskTags = new char[5][];
4459         // this.foundTaskMessages = new char[5][];
4460         // this.foundTaskPriorities = new char[5][];
4461         // this.foundTaskPositions = new int[5][];
4462         // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4463         // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4464         // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4465         // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4466         // char[this.foundTaskCount * 2][], 0,
4467         // this.foundTaskCount);
4468         // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4469         // new char[this.foundTaskCount * 2][], 0,
4470         // this.foundTaskCount);
4471         // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4472         // int[this.foundTaskCount * 2][], 0,
4473         // this.foundTaskCount);
4474         // }
4475         // this.foundTaskTags[this.foundTaskCount] = tag;
4476         // this.foundTaskPriorities[this.foundTaskCount] = priority;
4477         // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4478         // - 1 };
4479         // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4480         // this.foundTaskCount++;
4481         // i += tagLength - 1; // will be incremented when looping
4482         // }
4483         // }
4484         // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4485         // // retrieve message start and end positions
4486         // int msgStart = this.foundTaskPositions[i][0] +
4487         // this.foundTaskTags[i].length;
4488         // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4489         // 1][0] - 1 : commentEnd - 1;
4490         // // at most beginning of next task
4491         // if (max_value < msgStart)
4492         // max_value = msgStart; // would only occur if tag is before EOF.
4493         // int end = -1;
4494         // char c;
4495         // for (int j = msgStart; j < max_value; j++) {
4496         // if ((c = this.source[j]) == '\n' || c == '\r') {
4497         // end = j - 1;
4498         // break;
4499         // }
4500         // }
4501         // if (end == -1) {
4502         // for (int j = max_value; j > msgStart; j--) {
4503         // if ((c = this.source[j]) == '*') {
4504         // end = j - 1;
4505         // break;
4506         // }
4507         // }
4508         // if (end == -1)
4509         // end = max_value;
4510         // }
4511         // if (msgStart == end)
4512         // continue; // empty
4513         // // trim the message
4514         // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4515         // end--;
4516         // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4517         // msgStart++;
4518         // // update the end position of the task
4519         // this.foundTaskPositions[i][1] = end;
4520         // // get the message source
4521         // final int messageLength = end - msgStart + 1;
4522         // char[] message = new char[messageLength];
4523         // System.arraycopy(source, msgStart, message, 0, messageLength);
4524         // this.foundTaskMessages[i] = message;
4525         // }
4526         // }
4527 }