Fix bug #1385272: Parsing of short open tags not fully compatible to PHP parse
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /***********************************************************************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others. All rights reserved. This program and the
3  * accompanying materials are made available under the terms of the Common Public License v0.5 which accompanies this distribution,
4  * and is available at http://www.eclipse.org/legal/cpl-v05.html
5  *
6  * Contributors: IBM Corporation - initial API and implementation
7  **********************************************************************************************************************************/
8 package net.sourceforge.phpdt.internal.compiler.parser;
9
10 import java.util.ArrayList;
11 import java.util.Iterator;
12 import java.util.List;
13
14 import net.sourceforge.phpdt.core.compiler.CharOperation;
15 import net.sourceforge.phpdt.core.compiler.IScanner;
16 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
17 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19 import net.sourceforge.phpdt.internal.compiler.env.ICompilationUnit;
20
21 public class Scanner implements IScanner, ITerminalSymbols {
22         /*
23          * APIs ares - getNextToken() which return the current type of the token (this
24          * value is not memorized by the scanner) - getCurrentTokenSource() which
25          * provides with the token "REAL" source (aka all unicode have been
26          * transformed into a correct char) - sourceStart gives the position into the
27          * stream - currentPosition-1 gives the sourceEnd position into the stream
28          */
29         // 1.4 feature
30         // private boolean assertMode;
31         public boolean useAssertAsAnIndentifier = false;
32
33         // flag indicating if processed source contains occurrences of keyword assert
34         public boolean containsAssertKeyword = false;
35
36         public boolean recordLineSeparator;
37
38         public boolean ignorePHPOneLiner = false;
39
40         public boolean phpMode = false;
41
42 //      public boolean phpExpressionTag = false;
43
44         int fFillerToken = TokenNameEOF;
45         // public Stack encapsedStringStack = null;
46
47         public char currentCharacter;
48
49         public int startPosition;
50
51         public int currentPosition;
52
53         public int initialPosition, eofPosition;
54
55         // after this position eof are generated instead of real token from the
56         // source
57         public boolean tokenizeComments;
58
59         public boolean tokenizeWhiteSpace;
60
61         public boolean tokenizeStrings;
62
63         // source should be viewed as a window (aka a part)
64         // of a entire very large stream
65         public char source[];
66
67         // unicode support
68         public char[] withoutUnicodeBuffer;
69
70         public int withoutUnicodePtr;
71
72         // when == 0 ==> no unicode in the current token
73         public boolean unicodeAsBackSlash = false;
74
75         public boolean scanningFloatLiteral = false;
76
77         // support for /** comments
78         public int[] commentStops = new int[10];
79
80         public int[] commentStarts = new int[10];
81
82         public int commentPtr = -1; // no comment test with commentPtr value -1
83
84         protected int lastCommentLinePosition = -1;
85
86         // diet parsing support - jump over some method body when requested
87         public boolean diet = false;
88
89         // support for the poor-line-debuggers ....
90         // remember the position of the cr/lf
91         public int[] lineEnds = new int[250];
92
93         public int linePtr = -1;
94
95         public boolean wasAcr = false;
96
97         public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
98
99         public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
100
101         public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
102
103         public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
104
105         public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
106
107         public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
108
109         public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
110
111         public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
112
113         public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
114
115         public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
116
117         public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
118
119         public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
120
121         // ----------------optimized identifier managment------------------
122         static final char[] charArray_a = new char[] { 'a' }, charArray_b = new char[] { 'b' }, charArray_c = new char[] { 'c' },
123                         charArray_d = new char[] { 'd' }, charArray_e = new char[] { 'e' }, charArray_f = new char[] { 'f' },
124                         charArray_g = new char[] { 'g' }, charArray_h = new char[] { 'h' }, charArray_i = new char[] { 'i' },
125                         charArray_j = new char[] { 'j' }, charArray_k = new char[] { 'k' }, charArray_l = new char[] { 'l' },
126                         charArray_m = new char[] { 'm' }, charArray_n = new char[] { 'n' }, charArray_o = new char[] { 'o' },
127                         charArray_p = new char[] { 'p' }, charArray_q = new char[] { 'q' }, charArray_r = new char[] { 'r' },
128                         charArray_s = new char[] { 's' }, charArray_t = new char[] { 't' }, charArray_u = new char[] { 'u' },
129                         charArray_v = new char[] { 'v' }, charArray_w = new char[] { 'w' }, charArray_x = new char[] { 'x' },
130                         charArray_y = new char[] { 'y' }, charArray_z = new char[] { 'z' };
131
132         static final char[] charArray_va = new char[] { '$', 'a' }, charArray_vb = new char[] { '$', 'b' }, charArray_vc = new char[] {
133                         '$', 'c' }, charArray_vd = new char[] { '$', 'd' }, charArray_ve = new char[] { '$', 'e' }, charArray_vf = new char[] { '$',
134                         'f' }, charArray_vg = new char[] { '$', 'g' }, charArray_vh = new char[] { '$', 'h' },
135                         charArray_vi = new char[] { '$', 'i' }, charArray_vj = new char[] { '$', 'j' }, charArray_vk = new char[] { '$', 'k' },
136                         charArray_vl = new char[] { '$', 'l' }, charArray_vm = new char[] { '$', 'm' }, charArray_vn = new char[] { '$', 'n' },
137                         charArray_vo = new char[] { '$', 'o' }, charArray_vp = new char[] { '$', 'p' }, charArray_vq = new char[] { '$', 'q' },
138                         charArray_vr = new char[] { '$', 'r' }, charArray_vs = new char[] { '$', 's' }, charArray_vt = new char[] { '$', 't' },
139                         charArray_vu = new char[] { '$', 'u' }, charArray_vv = new char[] { '$', 'v' }, charArray_vw = new char[] { '$', 'w' },
140                         charArray_vx = new char[] { '$', 'x' }, charArray_vy = new char[] { '$', 'y' }, charArray_vz = new char[] { '$', 'z' };
141
142         public final static int MAX_OBVIOUS = 256;
143
144         static final int[] ObviousIdentCharNatures = new int[MAX_OBVIOUS];
145
146         public final static int C_DOLLAR = 8;
147
148         public final static int C_LETTER = 4;
149
150         public final static int C_DIGIT = 3;
151
152         public final static int C_SEPARATOR = 2;
153
154         public final static int C_SPACE = 1;
155         static {
156                 for (int i = '0'; i <= '9'; i++)
157                         ObviousIdentCharNatures[i] = C_DIGIT;
158
159                 for (int i = 'a'; i <= 'z'; i++)
160                         ObviousIdentCharNatures[i] = C_LETTER;
161                 for (int i = 'A'; i <= 'Z'; i++)
162                         ObviousIdentCharNatures[i] = C_LETTER;
163                 ObviousIdentCharNatures['_'] = C_LETTER;
164                 for (int i = 127; i <= 255; i++)
165                         ObviousIdentCharNatures[i] = C_LETTER;
166
167                 ObviousIdentCharNatures['$'] = C_DOLLAR;
168
169                 ObviousIdentCharNatures[10] = C_SPACE; // \ u000a: LINE FEED
170                 ObviousIdentCharNatures[12] = C_SPACE; // \ u000c: FORM FEED
171                 ObviousIdentCharNatures[13] = C_SPACE; // \ u000d: CARRIAGE RETURN
172                 ObviousIdentCharNatures[32] = C_SPACE; // \ u0020: SPACE
173                 ObviousIdentCharNatures[9] = C_SPACE; // \ u0009: HORIZONTAL TABULATION
174
175                 ObviousIdentCharNatures['.'] = C_SEPARATOR;
176                 ObviousIdentCharNatures[':'] = C_SEPARATOR;
177                 ObviousIdentCharNatures[';'] = C_SEPARATOR;
178                 ObviousIdentCharNatures[','] = C_SEPARATOR;
179                 ObviousIdentCharNatures['['] = C_SEPARATOR;
180                 ObviousIdentCharNatures[']'] = C_SEPARATOR;
181                 ObviousIdentCharNatures['('] = C_SEPARATOR;
182                 ObviousIdentCharNatures[')'] = C_SEPARATOR;
183                 ObviousIdentCharNatures['{'] = C_SEPARATOR;
184                 ObviousIdentCharNatures['}'] = C_SEPARATOR;
185                 ObviousIdentCharNatures['+'] = C_SEPARATOR;
186                 ObviousIdentCharNatures['-'] = C_SEPARATOR;
187                 ObviousIdentCharNatures['*'] = C_SEPARATOR;
188                 ObviousIdentCharNatures['/'] = C_SEPARATOR;
189                 ObviousIdentCharNatures['='] = C_SEPARATOR;
190                 ObviousIdentCharNatures['&'] = C_SEPARATOR;
191                 ObviousIdentCharNatures['|'] = C_SEPARATOR;
192                 ObviousIdentCharNatures['?'] = C_SEPARATOR;
193                 ObviousIdentCharNatures['<'] = C_SEPARATOR;
194                 ObviousIdentCharNatures['>'] = C_SEPARATOR;
195                 ObviousIdentCharNatures['!'] = C_SEPARATOR;
196                 ObviousIdentCharNatures['%'] = C_SEPARATOR;
197                 ObviousIdentCharNatures['^'] = C_SEPARATOR;
198                 ObviousIdentCharNatures['~'] = C_SEPARATOR;
199                 ObviousIdentCharNatures['"'] = C_SEPARATOR;
200                 ObviousIdentCharNatures['\''] = C_SEPARATOR;
201         }
202
203         static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
204
205         static final int TableSize = 30, InternalTableSize = 6;
206
207         // 30*6 = 180 entries
208         public static final int OptimizedLength = 6;
209
210         public/* static */
211         final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
212
213         // support for detecting non-externalized string literals
214         int currentLineNr = -1;
215
216         int previousLineNr = -1;
217
218         NLSLine currentLine = null;
219
220         List lines = new ArrayList();
221
222         public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
223
224         public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
225
226         public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
227
228         public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
229
230         public StringLiteral[] nonNLSStrings = null;
231
232         public boolean checkNonExternalizedStringLiterals = true;
233
234         public boolean wasNonExternalizedStringLiteral = false;
235
236         /* static */{
237                 for (int i = 0; i < 6; i++) {
238                         for (int j = 0; j < TableSize; j++) {
239                                 for (int k = 0; k < InternalTableSize; k++) {
240                                         charArray_length[i][j][k] = initCharArray;
241                                 }
242                         }
243                 }
244         }
245
246         static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
247
248         public static final int RoundBracket = 0;
249
250         public static final int SquareBracket = 1;
251
252         public static final int CurlyBracket = 2;
253
254         public static final int BracketKinds = 3;
255
256         // task tag support
257         public char[][] foundTaskTags = null;
258
259         public char[][] foundTaskMessages;
260
261         public char[][] foundTaskPriorities = null;
262
263         public int[][] foundTaskPositions;
264
265         public int foundTaskCount = 0;
266
267         public char[][] taskTags = null;
268
269         public char[][] taskPriorities = null;
270
271         public boolean isTaskCaseSensitive = true;
272
273         public static final boolean DEBUG = false;
274
275         public static final boolean TRACE = false;
276
277         public ICompilationUnit compilationUnit = null;
278
279         /**
280          * Determines if the specified character is permissible as the first character
281          * in a PHP identifier or variable
282          *
283          * The '$' character for PHP variables is regarded as a correct first
284          * character !
285          *
286          */
287         public static boolean isPHPIdentOrVarStart(char ch) {
288                 if (ch < MAX_OBVIOUS) {
289                         return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DOLLAR;
290                 }
291                 return false;
292                 // return Character.isLetter(ch) || (ch == '$') || (ch == '_') || (0x7F <=
293                 // ch && ch <= 0xFF);
294         }
295
296         /**
297          * Determines if the specified character is permissible as the first character
298          * in a PHP identifier.
299          *
300          * The '$' character for PHP variables isn't regarded as the first character !
301          */
302         public static boolean isPHPIdentifierStart(char ch) {
303                 if (ch < MAX_OBVIOUS) {
304                         return ObviousIdentCharNatures[ch] == C_LETTER;
305                 }
306                 return false;
307                 // return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <=
308                 // 0xFF);
309         }
310
311         /**
312          * Determines if the specified character may be part of a PHP identifier as
313          * other than the first character
314          */
315         public static boolean isPHPIdentifierPart(char ch) {
316                 if (ch < MAX_OBVIOUS) {
317                         return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
318                 }
319                 return false;
320                 // return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch
321                 // <= 0xFF);
322         }
323
324         public static boolean isSQLIdentifierPart(char ch) {
325                 if (ch < MAX_OBVIOUS) {
326                         return ObviousIdentCharNatures[ch] == C_LETTER || ObviousIdentCharNatures[ch] == C_DIGIT;
327                 }
328                 return false;
329         }
330
331         public final boolean atEnd() {
332                 // This code is not relevant if source is
333                 // Only a part of the real stream input
334                 return source.length == currentPosition;
335         }
336
337         public char[] getCurrentIdentifierSource() {
338                 // return the token REAL source (aka unicodes are precomputed)
339                 char[] result;
340                 // if (withoutUnicodePtr != 0)
341                 // //0 is used as a fast test flag so the real first char is in position 1
342                 // System.arraycopy(
343                 // withoutUnicodeBuffer,
344                 // 1,
345                 // result = new char[withoutUnicodePtr],
346                 // 0,
347                 // withoutUnicodePtr);
348                 // else {
349                 int length = currentPosition - startPosition;
350                 switch (length) { // see OptimizedLength
351                 case 1:
352                         return optimizedCurrentTokenSource1();
353                 case 2:
354                         return optimizedCurrentTokenSource2();
355                 case 3:
356                         return optimizedCurrentTokenSource3();
357                 case 4:
358                         return optimizedCurrentTokenSource4();
359                 case 5:
360                         return optimizedCurrentTokenSource5();
361                 case 6:
362                         return optimizedCurrentTokenSource6();
363                 }
364                 // no optimization
365                 System.arraycopy(source, startPosition, result = new char[length], 0, length);
366                 // }
367                 return result;
368         }
369
370         public int getCurrentTokenEndPosition() {
371                 return this.currentPosition - 1;
372         }
373
374         public final char[] getCurrentTokenSource() {
375                 // Return the token REAL source (aka unicodes are precomputed)
376                 char[] result;
377                 // if (withoutUnicodePtr != 0)
378                 // // 0 is used as a fast test flag so the real first char is in position 1
379                 // System.arraycopy(
380                 // withoutUnicodeBuffer,
381                 // 1,
382                 // result = new char[withoutUnicodePtr],
383                 // 0,
384                 // withoutUnicodePtr);
385                 // else {
386                 int length;
387                 System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
388                 // }
389                 return result;
390         }
391
392         public final char[] getCurrentTokenSource(int startPos) {
393                 // Return the token REAL source (aka unicodes are precomputed)
394                 char[] result;
395                 // if (withoutUnicodePtr != 0)
396                 // // 0 is used as a fast test flag so the real first char is in position 1
397                 // System.arraycopy(
398                 // withoutUnicodeBuffer,
399                 // 1,
400                 // result = new char[withoutUnicodePtr],
401                 // 0,
402                 // withoutUnicodePtr);
403                 // else {
404                 int length;
405                 System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
406                 // }
407                 return result;
408         }
409
410         public final char[] getCurrentTokenSourceString() {
411                 // return the token REAL source (aka unicodes are precomputed).
412                 // REMOVE the two " that are at the beginning and the end.
413                 char[] result;
414                 if (withoutUnicodePtr != 0)
415                         // 0 is used as a fast test flag so the real first char is in position 1
416                         System.arraycopy(withoutUnicodeBuffer, 2,
417                         // 2 is 1 (real start) + 1 (to jump over the ")
418                                         result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
419                 else {
420                         int length;
421                         System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
422                 }
423                 return result;
424         }
425
426         public final boolean equalsCurrentTokenSource(char[] word) {
427                 if (word.length != currentPosition - startPosition) {
428                         return false;
429                 }
430                 for (int i = 0; i < word.length; i++) {
431                         if (word[i] != source[startPosition + i]) {
432                                 return false;
433                         }
434                 }
435                 return true;
436         }
437
438         public final char[] getRawTokenSourceEnd() {
439                 int length = this.eofPosition - this.currentPosition - 1;
440                 char[] sourceEnd = new char[length];
441                 System.arraycopy(this.source, this.currentPosition, sourceEnd, 0, length);
442                 return sourceEnd;
443         }
444
445         public int getCurrentTokenStartPosition() {
446                 return this.startPosition;
447         }
448
449         public final String getCurrentStringLiteral() {
450                 char[] result = getCurrentStringLiteralSource();
451                 return new String(result);
452         }
453
454         public final char[] getCurrentStringLiteralSource() {
455                 // Return the token REAL source (aka unicodes are precomputed)
456                 if (startPosition + 1 >= currentPosition) {
457                         return new char[0];
458                 }
459                 char[] result;
460                 int length;
461                 System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
462                 // }
463                 return result;
464         }
465
466         public final char[] getCurrentStringLiteralSource(int startPos) {
467                 // Return the token REAL source (aka unicodes are precomputed)
468                 char[] result;
469                 int length;
470                 System.arraycopy(source, startPos + 1, result = new char[length = currentPosition - startPos - 2], 0, length);
471                 // }
472                 return result;
473         }
474
475         /*
476          * Search the source position corresponding to the end of a given line number
477          *
478          * Line numbers are 1-based, and relative to the scanner initialPosition.
479          * Character positions are 0-based.
480          *
481          * In case the given line number is inconsistent, answers -1.
482          */
483         public final int getLineEnd(int lineNumber) {
484                 if (lineEnds == null)
485                         return -1;
486                 if (lineNumber >= lineEnds.length)
487                         return -1;
488                 if (lineNumber <= 0)
489                         return -1;
490                 if (lineNumber == lineEnds.length - 1)
491                         return eofPosition;
492                 return lineEnds[lineNumber - 1];
493                 // next line start one character behind the lineEnd of the previous line
494         }
495
496         /**
497          * Search the source position corresponding to the beginning of a given line
498          * number
499          *
500          * Line numbers are 1-based, and relative to the scanner initialPosition.
501          * Character positions are 0-based.
502          *
503          * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
504          *
505          * In case the given line number is inconsistent, answers -1.
506          */
507         public final int getLineStart(int lineNumber) {
508                 if (lineEnds == null)
509                         return -1;
510                 if (lineNumber >= lineEnds.length)
511                         return -1;
512                 if (lineNumber <= 0)
513                         return -1;
514                 if (lineNumber == 1)
515                         return initialPosition;
516                 return lineEnds[lineNumber - 2] + 1;
517                 // next line start one character behind the lineEnd of the previous line
518         }
519
520         public final boolean getNextChar(char testedChar) {
521                 // BOOLEAN
522                 // handle the case of unicode.
523                 // when a unicode appears then we must use a buffer that holds char
524                 // internal values
525                 // At the end of this method currentCharacter holds the new visited char
526                 // and currentPosition points right next after it
527                 // Both previous lines are true if the currentCharacter is == to the
528                 // testedChar
529                 // On false, no side effect has occured.
530                 // ALL getNextChar.... ARE OPTIMIZED COPIES
531                 int temp = currentPosition;
532                 try {
533                         currentCharacter = source[currentPosition++];
534                         // if (((currentCharacter = source[currentPosition++]) == '\\')
535                         // && (source[currentPosition] == 'u')) {
536                         // //-------------unicode traitement ------------
537                         // int c1, c2, c3, c4;
538                         // int unicodeSize = 6;
539                         // currentPosition++;
540                         // while (source[currentPosition] == 'u') {
541                         // currentPosition++;
542                         // unicodeSize++;
543                         // }
544                         //
545                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
546                         // || c1 < 0)
547                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
548                         // || c2 < 0)
549                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
550                         // || c3 < 0)
551                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
552                         // || c4 < 0)) {
553                         // currentPosition = temp;
554                         // return false;
555                         // }
556                         //
557                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
558                         // if (currentCharacter != testedChar) {
559                         // currentPosition = temp;
560                         // return false;
561                         // }
562                         // unicodeAsBackSlash = currentCharacter == '\\';
563                         //
564                         // //need the unicode buffer
565                         // if (withoutUnicodePtr == 0) {
566                         // //buffer all the entries that have been left aside....
567                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
568                         // System.arraycopy(
569                         // source,
570                         // startPosition,
571                         // withoutUnicodeBuffer,
572                         // 1,
573                         // withoutUnicodePtr);
574                         // }
575                         // //fill the buffer with the char
576                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
577                         // return true;
578                         //
579                         // } //-------------end unicode traitement--------------
580                         // else {
581                         if (currentCharacter != testedChar) {
582                                 currentPosition = temp;
583                                 return false;
584                         }
585                         unicodeAsBackSlash = false;
586                         // if (withoutUnicodePtr != 0)
587                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
588                         return true;
589                         // }
590                 } catch (IndexOutOfBoundsException e) {
591                         unicodeAsBackSlash = false;
592                         currentPosition = temp;
593                         return false;
594                 }
595         }
596
597         public final int getNextChar(char testedChar1, char testedChar2) {
598                 // INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
599                 // test can be done with (x==0) for the first and (x>0) for the second
600                 // handle the case of unicode.
601                 // when a unicode appears then we must use a buffer that holds char
602                 // internal values
603                 // At the end of this method currentCharacter holds the new visited char
604                 // and currentPosition points right next after it
605                 // Both previous lines are true if the currentCharacter is == to the
606                 // testedChar1/2
607                 // On false, no side effect has occured.
608                 // ALL getNextChar.... ARE OPTIMIZED COPIES
609                 int temp = currentPosition;
610                 try {
611                         int result;
612                         currentCharacter = source[currentPosition++];
613                         // if (((currentCharacter = source[currentPosition++]) == '\\')
614                         // && (source[currentPosition] == 'u')) {
615                         // //-------------unicode traitement ------------
616                         // int c1, c2, c3, c4;
617                         // int unicodeSize = 6;
618                         // currentPosition++;
619                         // while (source[currentPosition] == 'u') {
620                         // currentPosition++;
621                         // unicodeSize++;
622                         // }
623                         //
624                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
625                         // || c1 < 0)
626                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
627                         // || c2 < 0)
628                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
629                         // || c3 < 0)
630                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
631                         // || c4 < 0)) {
632                         // currentPosition = temp;
633                         // return 2;
634                         // }
635                         //
636                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
637                         // if (currentCharacter == testedChar1)
638                         // result = 0;
639                         // else if (currentCharacter == testedChar2)
640                         // result = 1;
641                         // else {
642                         // currentPosition = temp;
643                         // return -1;
644                         // }
645                         //
646                         // //need the unicode buffer
647                         // if (withoutUnicodePtr == 0) {
648                         // //buffer all the entries that have been left aside....
649                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
650                         // System.arraycopy(
651                         // source,
652                         // startPosition,
653                         // withoutUnicodeBuffer,
654                         // 1,
655                         // withoutUnicodePtr);
656                         // }
657                         // //fill the buffer with the char
658                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
659                         // return result;
660                         // } //-------------end unicode traitement--------------
661                         // else {
662                         if (currentCharacter == testedChar1)
663                                 result = 0;
664                         else if (currentCharacter == testedChar2)
665                                 result = 1;
666                         else {
667                                 currentPosition = temp;
668                                 return -1;
669                         }
670                         // if (withoutUnicodePtr != 0)
671                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
672                         return result;
673                         // }
674                 } catch (IndexOutOfBoundsException e) {
675                         currentPosition = temp;
676                         return -1;
677                 }
678         }
679
680         public final boolean getNextCharAsDigit() {
681                 // BOOLEAN
682                 // handle the case of unicode.
683                 // when a unicode appears then we must use a buffer that holds char
684                 // internal values
685                 // At the end of this method currentCharacter holds the new visited char
686                 // and currentPosition points right next after it
687                 // Both previous lines are true if the currentCharacter is a digit
688                 // On false, no side effect has occured.
689                 // ALL getNextChar.... ARE OPTIMIZED COPIES
690                 int temp = currentPosition;
691                 try {
692                         currentCharacter = source[currentPosition++];
693                         // if (((currentCharacter = source[currentPosition++]) == '\\')
694                         // && (source[currentPosition] == 'u')) {
695                         // //-------------unicode traitement ------------
696                         // int c1, c2, c3, c4;
697                         // int unicodeSize = 6;
698                         // currentPosition++;
699                         // while (source[currentPosition] == 'u') {
700                         // currentPosition++;
701                         // unicodeSize++;
702                         // }
703                         //
704                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
705                         // || c1 < 0)
706                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
707                         // || c2 < 0)
708                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
709                         // || c3 < 0)
710                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
711                         // || c4 < 0)) {
712                         // currentPosition = temp;
713                         // return false;
714                         // }
715                         //
716                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
717                         // if (!Character.isDigit(currentCharacter)) {
718                         // currentPosition = temp;
719                         // return false;
720                         // }
721                         //
722                         // //need the unicode buffer
723                         // if (withoutUnicodePtr == 0) {
724                         // //buffer all the entries that have been left aside....
725                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
726                         // System.arraycopy(
727                         // source,
728                         // startPosition,
729                         // withoutUnicodeBuffer,
730                         // 1,
731                         // withoutUnicodePtr);
732                         // }
733                         // //fill the buffer with the char
734                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
735                         // return true;
736                         // } //-------------end unicode traitement--------------
737                         // else {
738                         if (!Character.isDigit(currentCharacter)) {
739                                 currentPosition = temp;
740                                 return false;
741                         }
742                         // if (withoutUnicodePtr != 0)
743                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
744                         return true;
745                         // }
746                 } catch (IndexOutOfBoundsException e) {
747                         currentPosition = temp;
748                         return false;
749                 }
750         }
751
752         public final boolean getNextCharAsDigit(int radix) {
753                 // BOOLEAN
754                 // handle the case of unicode.
755                 // when a unicode appears then we must use a buffer that holds char
756                 // internal values
757                 // At the end of this method currentCharacter holds the new visited char
758                 // and currentPosition points right next after it
759                 // Both previous lines are true if the currentCharacter is a digit base on
760                 // radix
761                 // On false, no side effect has occured.
762                 // ALL getNextChar.... ARE OPTIMIZED COPIES
763                 int temp = currentPosition;
764                 try {
765                         currentCharacter = source[currentPosition++];
766                         // if (((currentCharacter = source[currentPosition++]) == '\\')
767                         // && (source[currentPosition] == 'u')) {
768                         // //-------------unicode traitement ------------
769                         // int c1, c2, c3, c4;
770                         // int unicodeSize = 6;
771                         // currentPosition++;
772                         // while (source[currentPosition] == 'u') {
773                         // currentPosition++;
774                         // unicodeSize++;
775                         // }
776                         //
777                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
778                         // || c1 < 0)
779                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
780                         // || c2 < 0)
781                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
782                         // || c3 < 0)
783                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
784                         // || c4 < 0)) {
785                         // currentPosition = temp;
786                         // return false;
787                         // }
788                         //
789                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
790                         // if (Character.digit(currentCharacter, radix) == -1) {
791                         // currentPosition = temp;
792                         // return false;
793                         // }
794                         //
795                         // //need the unicode buffer
796                         // if (withoutUnicodePtr == 0) {
797                         // //buffer all the entries that have been left aside....
798                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
799                         // System.arraycopy(
800                         // source,
801                         // startPosition,
802                         // withoutUnicodeBuffer,
803                         // 1,
804                         // withoutUnicodePtr);
805                         // }
806                         // //fill the buffer with the char
807                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
808                         // return true;
809                         // } //-------------end unicode traitement--------------
810                         // else {
811                         if (Character.digit(currentCharacter, radix) == -1) {
812                                 currentPosition = temp;
813                                 return false;
814                         }
815                         // if (withoutUnicodePtr != 0)
816                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
817                         return true;
818                         // }
819                 } catch (IndexOutOfBoundsException e) {
820                         currentPosition = temp;
821                         return false;
822                 }
823         }
824
825         public boolean getNextCharAsJavaIdentifierPart() {
826                 // BOOLEAN
827                 // handle the case of unicode.
828                 // when a unicode appears then we must use a buffer that holds char
829                 // internal values
830                 // At the end of this method currentCharacter holds the new visited char
831                 // and currentPosition points right next after it
832                 // Both previous lines are true if the currentCharacter is a
833                 // JavaIdentifierPart
834                 // On false, no side effect has occured.
835                 // ALL getNextChar.... ARE OPTIMIZED COPIES
836                 int temp = currentPosition;
837                 try {
838                         currentCharacter = source[currentPosition++];
839                         // if (((currentCharacter = source[currentPosition++]) == '\\')
840                         // && (source[currentPosition] == 'u')) {
841                         // //-------------unicode traitement ------------
842                         // int c1, c2, c3, c4;
843                         // int unicodeSize = 6;
844                         // currentPosition++;
845                         // while (source[currentPosition] == 'u') {
846                         // currentPosition++;
847                         // unicodeSize++;
848                         // }
849                         //
850                         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
851                         // || c1 < 0)
852                         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
853                         // || c2 < 0)
854                         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
855                         // || c3 < 0)
856                         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
857                         // || c4 < 0)) {
858                         // currentPosition = temp;
859                         // return false;
860                         // }
861                         //
862                         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
863                         // if (!isPHPIdentifierPart(currentCharacter)) {
864                         // currentPosition = temp;
865                         // return false;
866                         // }
867                         //
868                         // //need the unicode buffer
869                         // if (withoutUnicodePtr == 0) {
870                         // //buffer all the entries that have been left aside....
871                         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
872                         // System.arraycopy(
873                         // source,
874                         // startPosition,
875                         // withoutUnicodeBuffer,
876                         // 1,
877                         // withoutUnicodePtr);
878                         // }
879                         // //fill the buffer with the char
880                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
881                         // return true;
882                         // } //-------------end unicode traitement--------------
883                         // else {
884                         if (!isPHPIdentifierPart(currentCharacter)) {
885                                 currentPosition = temp;
886                                 return false;
887                         }
888                         // if (withoutUnicodePtr != 0)
889                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
890                         return true;
891                         // }
892                 } catch (IndexOutOfBoundsException e) {
893                         currentPosition = temp;
894                         return false;
895                 }
896         }
897
898         public int getCastOrParen() {
899                 int tempPosition = currentPosition;
900                 char tempCharacter = currentCharacter;
901                 int tempToken = TokenNameLPAREN;
902                 boolean found = false;
903                 StringBuffer buf = new StringBuffer();
904                 try {
905                         do {
906                                 currentCharacter = source[currentPosition++];
907                         } while (currentCharacter == ' ' || currentCharacter == '\t');
908                         while (ObviousIdentCharNatures[currentCharacter] == C_LETTER) {
909                                 // while((currentCharacter >= 'a' && currentCharacter <= 'z') ||
910                                 // (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
911                                 buf.append(currentCharacter);
912                                 currentCharacter = source[currentPosition++];
913                         }
914                         if (buf.length() >= 3 && buf.length() <= 7) {
915                                 char[] data = buf.toString().toCharArray();
916                                 int index = 0;
917                                 switch (data.length) {
918                                 case 3:
919                                         // int
920                                         if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
921                                                 found = true;
922                                                 tempToken = TokenNameintCAST;
923                                         }
924                                         break;
925                                 case 4:
926                                         // bool real
927                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')) {
928                                                 found = true;
929                                                 tempToken = TokenNameboolCAST;
930                                         } else {
931                                                 index = 0;
932                                                 if ((data[index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'l')) {
933                                                         found = true;
934                                                         tempToken = TokenNamedoubleCAST;
935                                                 }
936                                         }
937                                         break;
938                                 case 5:
939                                         // array unset float
940                                         if ((data[index] == 'a') && (data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a')
941                                                         && (data[++index] == 'y')) {
942                                                 found = true;
943                                                 tempToken = TokenNamearrayCAST;
944                                         } else {
945                                                 index = 0;
946                                                 if ((data[index] == 'u') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e')
947                                                                 && (data[++index] == 't')) {
948                                                         found = true;
949                                                         tempToken = TokenNameunsetCAST;
950                                                 } else {
951                                                         index = 0;
952                                                         if ((data[index] == 'f') && (data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'a')
953                                                                         && (data[++index] == 't')) {
954                                                                 found = true;
955                                                                 tempToken = TokenNamedoubleCAST;
956                                                         }
957                                                 }
958                                         }
959                                         break;
960                                 case 6:
961                                         // object string double
962                                         if ((data[index] == 'o') && (data[++index] == 'b') && (data[++index] == 'j') && (data[++index] == 'e')
963                                                         && (data[++index] == 'c') && (data[++index] == 't')) {
964                                                 found = true;
965                                                 tempToken = TokenNameobjectCAST;
966                                         } else {
967                                                 index = 0;
968                                                 if ((data[index] == 's') && (data[++index] == 't') && (data[++index] == 'r') && (data[++index] == 'i')
969                                                                 && (data[++index] == 'n') && (data[++index] == 'g')) {
970                                                         found = true;
971                                                         tempToken = TokenNamestringCAST;
972                                                 } else {
973                                                         index = 0;
974                                                         if ((data[index] == 'd') && (data[++index] == 'o') && (data[++index] == 'u') && (data[++index] == 'b')
975                                                                         && (data[++index] == 'l') && (data[++index] == 'e')) {
976                                                                 found = true;
977                                                                 tempToken = TokenNamedoubleCAST;
978                                                         }
979                                                 }
980                                         }
981                                         break;
982                                 case 7:
983                                         // boolean integer
984                                         if ((data[index] == 'b') && (data[++index] == 'o') && (data[++index] == 'o') && (data[++index] == 'l')
985                                                         && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'n')) {
986                                                 found = true;
987                                                 tempToken = TokenNameboolCAST;
988                                         } else {
989                                                 index = 0;
990                                                 if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e')
991                                                                 && (data[++index] == 'g') && (data[++index] == 'e') && (data[++index] == 'r')) {
992                                                         found = true;
993                                                         tempToken = TokenNameintCAST;
994                                                 }
995                                         }
996                                         break;
997                                 }
998                                 if (found) {
999                                         while (currentCharacter == ' ' || currentCharacter == '\t') {
1000                                                 currentCharacter = source[currentPosition++];
1001                                         }
1002                                         if (currentCharacter == ')') {
1003                                                 return tempToken;
1004                                         }
1005                                 }
1006                         }
1007                 } catch (IndexOutOfBoundsException e) {
1008                 }
1009                 currentCharacter = tempCharacter;
1010                 currentPosition = tempPosition;
1011                 return TokenNameLPAREN;
1012         }
1013
1014         public void consumeStringInterpolated() throws InvalidInputException {
1015                 try {
1016                         // consume next character
1017                         unicodeAsBackSlash = false;
1018                         currentCharacter = source[currentPosition++];
1019                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1020                         // && (source[currentPosition] == 'u')) {
1021                         // getNextUnicodeChar();
1022                         // } else {
1023                         // if (withoutUnicodePtr != 0) {
1024                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1025                         // currentCharacter;
1026                         // }
1027                         // }
1028                         while (currentCharacter != '`') {
1029                                 /** ** in PHP \r and \n are valid in string literals *** */
1030                                 // if ((currentCharacter == '\n')
1031                                 // || (currentCharacter == '\r')) {
1032                                 // // relocate if finding another quote fairly close: thus unicode
1033                                 // '/u000D' will be fully consumed
1034                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1035                                 // if (currentPosition + lookAhead == source.length)
1036                                 // break;
1037                                 // if (source[currentPosition + lookAhead] == '\n')
1038                                 // break;
1039                                 // if (source[currentPosition + lookAhead] == '\"') {
1040                                 // currentPosition += lookAhead + 1;
1041                                 // break;
1042                                 // }
1043                                 // }
1044                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1045                                 // }
1046                                 if (currentCharacter == '\\') {
1047                                         int escapeSize = currentPosition;
1048                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1049                                         // scanEscapeCharacter make a side effect on this value and we need
1050                                         // the previous value few lines down this one
1051                                         scanDoubleQuotedEscapeCharacter();
1052                                         escapeSize = currentPosition - escapeSize;
1053                                         if (withoutUnicodePtr == 0) {
1054                                                 // buffer all the entries that have been left aside....
1055                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1056                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1057                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1058                                         } else { // overwrite the / in the buffer
1059                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1060                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1061                                                         // where only one is correct
1062                                                         withoutUnicodePtr--;
1063                                                 }
1064                                         }
1065                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1066                                         if (recordLineSeparator) {
1067                                                 pushLineSeparator();
1068                                         }
1069                                 }
1070                                 // consume next character
1071                                 unicodeAsBackSlash = false;
1072                                 currentCharacter = source[currentPosition++];
1073                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1074                                 // && (source[currentPosition] == 'u')) {
1075                                 // getNextUnicodeChar();
1076                                 // } else {
1077                                 if (withoutUnicodePtr != 0) {
1078                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1079                                 }
1080                                 // }
1081                         }
1082                 } catch (IndexOutOfBoundsException e) {
1083                         // reset end position for error reporting
1084                         currentPosition -= 2;
1085                         throw new InvalidInputException(UNTERMINATED_STRING);
1086                 } catch (InvalidInputException e) {
1087                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1088                                 // relocate if finding another quote fairly close: thus unicode
1089                                 // '/u000D' will be fully consumed
1090                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1091                                         if (currentPosition + lookAhead == source.length)
1092                                                 break;
1093                                         if (source[currentPosition + lookAhead] == '\n')
1094                                                 break;
1095                                         if (source[currentPosition + lookAhead] == '`') {
1096                                                 currentPosition += lookAhead + 1;
1097                                                 break;
1098                                         }
1099                                 }
1100                         }
1101                         throw e; // rethrow
1102                 }
1103                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1104                         // //$NON-NLS-?$ where ? is an
1105                         // int.
1106                         if (currentLine == null) {
1107                                 currentLine = new NLSLine();
1108                                 lines.add(currentLine);
1109                         }
1110                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1111                 }
1112         }
1113
1114         public void consumeStringConstant() throws InvalidInputException {
1115                 try {
1116                         // consume next character
1117                         unicodeAsBackSlash = false;
1118                         currentCharacter = source[currentPosition++];
1119                         // if (((currentCharacter = source[currentPosition++]) == '\\')
1120                         // && (source[currentPosition] == 'u')) {
1121                         // getNextUnicodeChar();
1122                         // } else {
1123                         // if (withoutUnicodePtr != 0) {
1124                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1125                         // currentCharacter;
1126                         // }
1127                         // }
1128                         while (currentCharacter != '\'') {
1129                                 /** ** in PHP \r and \n are valid in string literals *** */
1130                                 // if ((currentCharacter == '\n')
1131                                 // || (currentCharacter == '\r')) {
1132                                 // // relocate if finding another quote fairly close: thus unicode
1133                                 // '/u000D' will be fully consumed
1134                                 // for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1135                                 // if (currentPosition + lookAhead == source.length)
1136                                 // break;
1137                                 // if (source[currentPosition + lookAhead] == '\n')
1138                                 // break;
1139                                 // if (source[currentPosition + lookAhead] == '\"') {
1140                                 // currentPosition += lookAhead + 1;
1141                                 // break;
1142                                 // }
1143                                 // }
1144                                 // throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1145                                 // }
1146                                 if (currentCharacter == '\\') {
1147                                         int escapeSize = currentPosition;
1148                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1149                                         // scanEscapeCharacter make a side effect on this value and we need
1150                                         // the previous value few lines down this one
1151                                         scanSingleQuotedEscapeCharacter();
1152                                         escapeSize = currentPosition - escapeSize;
1153                                         if (withoutUnicodePtr == 0) {
1154                                                 // buffer all the entries that have been left aside....
1155                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1156                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1157                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1158                                         } else { // overwrite the / in the buffer
1159                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1160                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1161                                                         // where only one is correct
1162                                                         withoutUnicodePtr--;
1163                                                 }
1164                                         }
1165                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1166                                         if (recordLineSeparator) {
1167                                                 pushLineSeparator();
1168                                         }
1169                                 }
1170                                 // consume next character
1171                                 unicodeAsBackSlash = false;
1172                                 currentCharacter = source[currentPosition++];
1173                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1174                                 // && (source[currentPosition] == 'u')) {
1175                                 // getNextUnicodeChar();
1176                                 // } else {
1177                                 if (withoutUnicodePtr != 0) {
1178                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1179                                 }
1180                                 // }
1181                         }
1182                 } catch (IndexOutOfBoundsException e) {
1183                         // reset end position for error reporting
1184                         currentPosition -= 2;
1185                         throw new InvalidInputException(UNTERMINATED_STRING);
1186                 } catch (InvalidInputException e) {
1187                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1188                                 // relocate if finding another quote fairly close: thus unicode
1189                                 // '/u000D' will be fully consumed
1190                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1191                                         if (currentPosition + lookAhead == source.length)
1192                                                 break;
1193                                         if (source[currentPosition + lookAhead] == '\n')
1194                                                 break;
1195                                         if (source[currentPosition + lookAhead] == '\'') {
1196                                                 currentPosition += lookAhead + 1;
1197                                                 break;
1198                                         }
1199                                 }
1200                         }
1201                         throw e; // rethrow
1202                 }
1203                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1204                         // //$NON-NLS-?$ where ? is an
1205                         // int.
1206                         if (currentLine == null) {
1207                                 currentLine = new NLSLine();
1208                                 lines.add(currentLine);
1209                         }
1210                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1211                 }
1212         }
1213
1214         public void consumeStringLiteral() throws InvalidInputException {
1215                 try {
1216                         boolean openDollarBrace = false;
1217                         // consume next character
1218                         unicodeAsBackSlash = false;
1219                         currentCharacter = source[currentPosition++];
1220                         while (currentCharacter != '"' || openDollarBrace) {
1221                                 /** ** in PHP \r and \n are valid in string literals *** */
1222                                 if (currentCharacter == '\\') {
1223                                         int escapeSize = currentPosition;
1224                                         boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1225                                         // scanEscapeCharacter make a side effect on this value and we need
1226                                         // the previous value few lines down this one
1227                                         scanDoubleQuotedEscapeCharacter();
1228                                         escapeSize = currentPosition - escapeSize;
1229                                         if (withoutUnicodePtr == 0) {
1230                                                 // buffer all the entries that have been left aside....
1231                                                 withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1232                                                 System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1233                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1234                                         } else { // overwrite the / in the buffer
1235                                                 withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1236                                                 if (backSlashAsUnicodeInString) { // there are TWO \ in the stream
1237                                                         // where only one is correct
1238                                                         withoutUnicodePtr--;
1239                                                 }
1240                                         }
1241                                 } else if (currentCharacter == '$' && source[currentPosition] == '{') {
1242                                         openDollarBrace = true;
1243                                 } else if (currentCharacter == '{' && source[currentPosition] == '$') {
1244                                         openDollarBrace = true;
1245                                 } else if (currentCharacter == '}') {
1246                                         openDollarBrace = false;
1247                                 } else if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1248                                         if (recordLineSeparator) {
1249                                                 pushLineSeparator();
1250                                         }
1251                                 }
1252                                 // consume next character
1253                                 unicodeAsBackSlash = false;
1254                                 currentCharacter = source[currentPosition++];
1255                                 if (withoutUnicodePtr != 0) {
1256                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1257                                 }
1258                         }
1259                 } catch (IndexOutOfBoundsException e) {
1260                         // reset end position for error reporting
1261                         currentPosition -= 2;
1262                         throw new InvalidInputException(UNTERMINATED_STRING);
1263                 } catch (InvalidInputException e) {
1264                         if (e.getMessage().equals(INVALID_ESCAPE)) {
1265                                 // relocate if finding another quote fairly close: thus unicode
1266                                 // '/u000D' will be fully consumed
1267                                 for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1268                                         if (currentPosition + lookAhead == source.length)
1269                                                 break;
1270                                         if (source[currentPosition + lookAhead] == '\n')
1271                                                 break;
1272                                         if (source[currentPosition + lookAhead] == '\"') {
1273                                                 currentPosition += lookAhead + 1;
1274                                                 break;
1275                                         }
1276                                 }
1277                         }
1278                         throw e; // rethrow
1279                 }
1280                 if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1281                         // //$NON-NLS-?$ where ? is an
1282                         // int.
1283                         if (currentLine == null) {
1284                                 currentLine = new NLSLine();
1285                                 lines.add(currentLine);
1286                         }
1287                         currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1288                 }
1289         }
1290
1291         public int getNextToken() throws InvalidInputException {
1292                 if (!phpMode) {
1293                         return getInlinedHTMLToken(currentPosition);
1294                 } else {
1295                         if (fFillerToken!=TokenNameEOF) {
1296                                 int tempToken;
1297                                 tempToken = fFillerToken;
1298                                 fFillerToken=TokenNameEOF;
1299                                 return tempToken;
1300                         }
1301                         this.wasAcr = false;
1302                         if (diet) {
1303                                 jumpOverMethodBody();
1304                                 diet = false;
1305                                 return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1306                         }
1307                         try {
1308                                 while (true) {
1309                                         withoutUnicodePtr = 0;
1310                                         // start with a new token
1311                                         char encapsedChar = ' ';
1312                                         // if (!encapsedStringStack.isEmpty()) {
1313                                         // encapsedChar = ((Character)
1314                                         // encapsedStringStack.peek()).charValue();
1315                                         // }
1316                                         // if (encapsedChar != '$' && encapsedChar != ' ') {
1317                                         // currentCharacter = source[currentPosition++];
1318                                         // if (currentCharacter == encapsedChar) {
1319                                         // switch (currentCharacter) {
1320                                         // case '`':
1321                                         // return TokenNameEncapsedString0;
1322                                         // case '\'':
1323                                         // return TokenNameEncapsedString1;
1324                                         // case '"':
1325                                         // return TokenNameEncapsedString2;
1326                                         // }
1327                                         // }
1328                                         // while (currentCharacter != encapsedChar) {
1329                                         // /** ** in PHP \r and \n are valid in string literals *** */
1330                                         // switch (currentCharacter) {
1331                                         // case '\\':
1332                                         // int escapeSize = currentPosition;
1333                                         // boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1334                                         // //scanEscapeCharacter make a side effect on this value and
1335                                         // // we need the previous value few lines down this one
1336                                         // scanDoubleQuotedEscapeCharacter();
1337                                         // escapeSize = currentPosition - escapeSize;
1338                                         // if (withoutUnicodePtr == 0) {
1339                                         // //buffer all the entries that have been left aside....
1340                                         // withoutUnicodePtr = currentPosition - escapeSize - 1 -
1341                                         // startPosition;
1342                                         // System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1343                                         // withoutUnicodePtr);
1344                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1345                                         // } else { //overwrite the / in the buffer
1346                                         // withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1347                                         // if (backSlashAsUnicodeInString) { //there are TWO \ in
1348                                         // withoutUnicodePtr--;
1349                                         // }
1350                                         // }
1351                                         // break;
1352                                         // case '\r':
1353                                         // case '\n':
1354                                         // if (recordLineSeparator) {
1355                                         // pushLineSeparator();
1356                                         // }
1357                                         // break;
1358                                         // case '$':
1359                                         // if (isPHPIdentifierStart(source[currentPosition]) ||
1360                                         // source[currentPosition] == '{') {
1361                                         // currentPosition--;
1362                                         // encapsedStringStack.push(new Character('$'));
1363                                         // return TokenNameSTRING;
1364                                         // }
1365                                         // break;
1366                                         // case '{':
1367                                         // if (source[currentPosition] == '$') { // CURLY_OPEN
1368                                         // currentPosition--;
1369                                         // encapsedStringStack.push(new Character('$'));
1370                                         // return TokenNameSTRING;
1371                                         // }
1372                                         // }
1373                                         // // consume next character
1374                                         // unicodeAsBackSlash = false;
1375                                         // currentCharacter = source[currentPosition++];
1376                                         // if (withoutUnicodePtr != 0) {
1377                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1378                                         // }
1379                                         // // }
1380                                         // } // end while
1381                                         // currentPosition--;
1382                                         // return TokenNameSTRING;
1383                                         // }
1384                                         // ---------Consume white space and handles startPosition---------
1385                                         int whiteStart = currentPosition;
1386                                         startPosition = currentPosition;
1387                                         currentCharacter = source[currentPosition++];
1388                                         // if (encapsedChar == '$') {
1389                                         // switch (currentCharacter) {
1390                                         // case '\\':
1391                                         // currentCharacter = source[currentPosition++];
1392                                         // return TokenNameSTRING;
1393                                         // case '{':
1394                                         // if (encapsedChar == '$') {
1395                                         // if (getNextChar('$'))
1396                                         // return TokenNameLBRACE_DOLLAR;
1397                                         // }
1398                                         // return TokenNameLBRACE;
1399                                         // case '}':
1400                                         // return TokenNameRBRACE;
1401                                         // case '[':
1402                                         // return TokenNameLBRACKET;
1403                                         // case ']':
1404                                         // return TokenNameRBRACKET;
1405                                         // case '\'':
1406                                         // if (tokenizeStrings) {
1407                                         // consumeStringConstant();
1408                                         // return TokenNameStringSingleQuote;
1409                                         // }
1410                                         // return TokenNameEncapsedString1;
1411                                         // case '"':
1412                                         // return TokenNameEncapsedString2;
1413                                         // case '`':
1414                                         // if (tokenizeStrings) {
1415                                         // consumeStringInterpolated();
1416                                         // return TokenNameStringInterpolated;
1417                                         // }
1418                                         // return TokenNameEncapsedString0;
1419                                         // case '-':
1420                                         // if (getNextChar('>'))
1421                                         // return TokenNameMINUS_GREATER;
1422                                         // return TokenNameSTRING;
1423                                         // default:
1424                                         // if (currentCharacter == '$') {
1425                                         // int oldPosition = currentPosition;
1426                                         // try {
1427                                         // currentCharacter = source[currentPosition++];
1428                                         // if (currentCharacter == '{') {
1429                                         // return TokenNameDOLLAR_LBRACE;
1430                                         // }
1431                                         // if (isPHPIdentifierStart(currentCharacter)) {
1432                                         // return scanIdentifierOrKeyword(true);
1433                                         // } else {
1434                                         // currentPosition = oldPosition;
1435                                         // return TokenNameSTRING;
1436                                         // }
1437                                         // } catch (IndexOutOfBoundsException e) {
1438                                         // currentPosition = oldPosition;
1439                                         // return TokenNameSTRING;
1440                                         // }
1441                                         // }
1442                                         // if (isPHPIdentifierStart(currentCharacter))
1443                                         // return scanIdentifierOrKeyword(false);
1444                                         // if (Character.isDigit(currentCharacter))
1445                                         // return scanNumber(false);
1446                                         // return TokenNameERROR;
1447                                         // }
1448                                         // }
1449                                         // boolean isWhiteSpace;
1450
1451                                         while ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
1452                                                 startPosition = currentPosition;
1453                                                 currentCharacter = source[currentPosition++];
1454                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
1455                                                 // && (source[currentPosition] == 'u')) {
1456                                                 // isWhiteSpace = jumpOverUnicodeWhiteSpace();
1457                                                 // } else {
1458                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1459                                                         checkNonExternalizeString();
1460                                                         if (recordLineSeparator) {
1461                                                                 pushLineSeparator();
1462                                                         } else {
1463                                                                 currentLine = null;
1464                                                         }
1465                                                 }
1466                                                 // isWhiteSpace = (currentCharacter == ' ')
1467                                                 // || Character.isWhitespace(currentCharacter);
1468                                                 // }
1469                                         }
1470                                         if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1471                                                 // reposition scanner in case we are interested by spaces as tokens
1472                                                 currentPosition--;
1473                                                 startPosition = whiteStart;
1474                                                 return TokenNameWHITESPACE;
1475                                         }
1476                                         // little trick to get out in the middle of a source compuation
1477                                         if (currentPosition > eofPosition)
1478                                                 return TokenNameEOF;
1479                                         // ---------Identify the next token-------------
1480                                         switch (currentCharacter) {
1481                                         case '(':
1482                                                 return getCastOrParen();
1483                                         case ')':
1484                                                 return TokenNameRPAREN;
1485                                         case '{':
1486                                                 return TokenNameLBRACE;
1487                                         case '}':
1488                                                 return TokenNameRBRACE;
1489                                         case '[':
1490                                                 return TokenNameLBRACKET;
1491                                         case ']':
1492                                                 return TokenNameRBRACKET;
1493                                         case ';':
1494                                                 return TokenNameSEMICOLON;
1495                                         case ',':
1496                                                 return TokenNameCOMMA;
1497                                         case '.':
1498                                                 if (getNextChar('='))
1499                                                         return TokenNameDOT_EQUAL;
1500                                                 if (getNextCharAsDigit())
1501                                                         return scanNumber(true);
1502                                                 return TokenNameDOT;
1503                                         case '+': {
1504                                                 int test;
1505                                                 if ((test = getNextChar('+', '=')) == 0)
1506                                                         return TokenNamePLUS_PLUS;
1507                                                 if (test > 0)
1508                                                         return TokenNamePLUS_EQUAL;
1509                                                 return TokenNamePLUS;
1510                                         }
1511                                         case '-': {
1512                                                 int test;
1513                                                 if ((test = getNextChar('-', '=')) == 0)
1514                                                         return TokenNameMINUS_MINUS;
1515                                                 if (test > 0)
1516                                                         return TokenNameMINUS_EQUAL;
1517                                                 if (getNextChar('>'))
1518                                                         return TokenNameMINUS_GREATER;
1519                                                 return TokenNameMINUS;
1520                                         }
1521                                         case '~':
1522                                                 if (getNextChar('='))
1523                                                         return TokenNameTWIDDLE_EQUAL;
1524                                                 return TokenNameTWIDDLE;
1525                                         case '!':
1526                                                 if (getNextChar('=')) {
1527                                                         if (getNextChar('=')) {
1528                                                                 return TokenNameNOT_EQUAL_EQUAL;
1529                                                         }
1530                                                         return TokenNameNOT_EQUAL;
1531                                                 }
1532                                                 return TokenNameNOT;
1533                                         case '*':
1534                                                 if (getNextChar('='))
1535                                                         return TokenNameMULTIPLY_EQUAL;
1536                                                 return TokenNameMULTIPLY;
1537                                         case '%':
1538                                                 if (getNextChar('='))
1539                                                         return TokenNameREMAINDER_EQUAL;
1540                                                 return TokenNameREMAINDER;
1541                                         case '<': {
1542                                                 int oldPosition = currentPosition;
1543                                                 try {
1544                                                         currentCharacter = source[currentPosition++];
1545                                                 } catch (IndexOutOfBoundsException e) {
1546                                                         currentPosition = oldPosition;
1547                                                         return TokenNameLESS;
1548                                                 }
1549                                                 switch (currentCharacter) {
1550                                                 case '=':
1551                                                         return TokenNameLESS_EQUAL;
1552                                                 case '>':
1553                                                         return TokenNameNOT_EQUAL;
1554                                                 case '<':
1555                                                         if (getNextChar('='))
1556                                                                 return TokenNameLEFT_SHIFT_EQUAL;
1557                                                         if (getNextChar('<')) {
1558                                                                 currentCharacter = source[currentPosition++];
1559                                                                 while (Character.isWhitespace(currentCharacter)) {
1560                                                                         currentCharacter = source[currentPosition++];
1561                                                                 }
1562                                                                 int heredocStart = currentPosition - 1;
1563                                                                 int heredocLength = 0;
1564                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1565                                                                         currentCharacter = source[currentPosition++];
1566                                                                 } else {
1567                                                                         return TokenNameERROR;
1568                                                                 }
1569                                                                 while (isPHPIdentifierPart(currentCharacter)) {
1570                                                                         currentCharacter = source[currentPosition++];
1571                                                                 }
1572                                                                 heredocLength = currentPosition - heredocStart - 1;
1573                                                                 // heredoc end-tag determination
1574                                                                 boolean endTag = true;
1575                                                                 char ch;
1576                                                                 do {
1577                                                                         ch = source[currentPosition++];
1578                                                                         if (ch == '\r' || ch == '\n') {
1579                                                                                 if (recordLineSeparator) {
1580                                                                                         pushLineSeparator();
1581                                                                                 } else {
1582                                                                                         currentLine = null;
1583                                                                                 }
1584                                                                                 for (int i = 0; i < heredocLength; i++) {
1585                                                                                         if (source[currentPosition + i] != source[heredocStart + i]) {
1586                                                                                                 endTag = false;
1587                                                                                                 break;
1588                                                                                         }
1589                                                                                 }
1590                                                                                 if (endTag) {
1591                                                                                         currentPosition += heredocLength - 1;
1592                                                                                         currentCharacter = source[currentPosition++];
1593                                                                                         break; // do...while loop
1594                                                                                 } else {
1595                                                                                         endTag = true;
1596                                                                                 }
1597                                                                         }
1598                                                                 } while (true);
1599                                                                 return TokenNameHEREDOC;
1600                                                         }
1601                                                         return TokenNameLEFT_SHIFT;
1602                                                 }
1603                                                 currentPosition = oldPosition;
1604                                                 return TokenNameLESS;
1605                                         }
1606                                         case '>': {
1607                                                 int test;
1608                                                 if ((test = getNextChar('=', '>')) == 0)
1609                                                         return TokenNameGREATER_EQUAL;
1610                                                 if (test > 0) {
1611                                                         if ((test = getNextChar('=', '>')) == 0)
1612                                                                 return TokenNameRIGHT_SHIFT_EQUAL;
1613                                                         return TokenNameRIGHT_SHIFT;
1614                                                 }
1615                                                 return TokenNameGREATER;
1616                                         }
1617                                         case '=':
1618                                                 if (getNextChar('=')) {
1619                                                         if (getNextChar('=')) {
1620                                                                 return TokenNameEQUAL_EQUAL_EQUAL;
1621                                                         }
1622                                                         return TokenNameEQUAL_EQUAL;
1623                                                 }
1624                                                 if (getNextChar('>'))
1625                                                         return TokenNameEQUAL_GREATER;
1626                                                 return TokenNameEQUAL;
1627                                         case '&': {
1628                                                 int test;
1629                                                 if ((test = getNextChar('&', '=')) == 0)
1630                                                         return TokenNameAND_AND;
1631                                                 if (test > 0)
1632                                                         return TokenNameAND_EQUAL;
1633                                                 return TokenNameAND;
1634                                         }
1635                                         case '|': {
1636                                                 int test;
1637                                                 if ((test = getNextChar('|', '=')) == 0)
1638                                                         return TokenNameOR_OR;
1639                                                 if (test > 0)
1640                                                         return TokenNameOR_EQUAL;
1641                                                 return TokenNameOR;
1642                                         }
1643                                         case '^':
1644                                                 if (getNextChar('='))
1645                                                         return TokenNameXOR_EQUAL;
1646                                                 return TokenNameXOR;
1647                                         case '?':
1648                                                 if (getNextChar('>')) {
1649                                                         phpMode = false;
1650                                                         if (currentPosition == source.length) {
1651                                                                 phpMode = true;
1652                                                                 return TokenNameINLINE_HTML;
1653                                                         }
1654                                                         return getInlinedHTMLToken(currentPosition - 2);
1655                                                 }
1656                                                 return TokenNameQUESTION;
1657                                         case ':':
1658                                                 if (getNextChar(':'))
1659                                                         return TokenNamePAAMAYIM_NEKUDOTAYIM;
1660                                                 return TokenNameCOLON;
1661                                         case '@':
1662                                                 return TokenNameAT;
1663                                         case '\'':
1664                                                 consumeStringConstant();
1665                                                 return TokenNameStringSingleQuote;
1666                                         case '"':
1667                                                 // if (tokenizeStrings) {
1668                                                 consumeStringLiteral();
1669                                                 return TokenNameStringDoubleQuote;
1670                                         // }
1671                                         // return TokenNameEncapsedString2;
1672                                         case '`':
1673                                                 // if (tokenizeStrings) {
1674                                                 consumeStringInterpolated();
1675                                                 return TokenNameStringInterpolated;
1676                                         // }
1677                                         // return TokenNameEncapsedString0;
1678                                         case '#':
1679                                         case '/': {
1680                                                 char startChar = currentCharacter;
1681                                                 if (getNextChar('=') && startChar == '/') {
1682                                                         return TokenNameDIVIDE_EQUAL;
1683                                                 }
1684                                                 int test;
1685                                                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1686                                                         // line comment
1687                                                         this.lastCommentLinePosition = this.currentPosition;
1688                                                         int endPositionForLineComment = 0;
1689                                                         try { // get the next char
1690                                                                 currentCharacter = source[currentPosition++];
1691                                                                 // if (((currentCharacter = source[currentPosition++])
1692                                                                 // == '\\')
1693                                                                 // && (source[currentPosition] == 'u')) {
1694                                                                 // //-------------unicode traitement ------------
1695                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1696                                                                 // currentPosition++;
1697                                                                 // while (source[currentPosition] == 'u') {
1698                                                                 // currentPosition++;
1699                                                                 // }
1700                                                                 // if ((c1 =
1701                                                                 // Character.getNumericValue(source[currentPosition++]))
1702                                                                 // > 15
1703                                                                 // || c1 < 0
1704                                                                 // || (c2 =
1705                                                                 // Character.getNumericValue(source[currentPosition++]))
1706                                                                 // > 15
1707                                                                 // || c2 < 0
1708                                                                 // || (c3 =
1709                                                                 // Character.getNumericValue(source[currentPosition++]))
1710                                                                 // > 15
1711                                                                 // || c3 < 0
1712                                                                 // || (c4 =
1713                                                                 // Character.getNumericValue(source[currentPosition++]))
1714                                                                 // > 15
1715                                                                 // || c4 < 0) {
1716                                                                 // throw new
1717                                                                 // InvalidInputException(INVALID_UNICODE_ESCAPE);
1718                                                                 // } else {
1719                                                                 // currentCharacter =
1720                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1721                                                                 // }
1722                                                                 // }
1723                                                                 // handle the \\u case manually into comment
1724                                                                 // if (currentCharacter == '\\') {
1725                                                                 // if (source[currentPosition] == '\\')
1726                                                                 // currentPosition++;
1727                                                                 // } //jump over the \\
1728                                                                 boolean isUnicode = false;
1729                                                                 while (currentCharacter != '\r' && currentCharacter != '\n') {
1730                                                                         this.lastCommentLinePosition = this.currentPosition;
1731                                                                         if (currentCharacter == '?') {
1732                                                                                 if (getNextChar('>')) {
1733                                                                                         // ?> breaks line comments
1734                                                                                         startPosition = currentPosition - 2;
1735                                                                                         phpMode = false;
1736                                                                                         return TokenNameINLINE_HTML;
1737                                                                                 }
1738                                                                         }
1739                                                                         // get the next char
1740                                                                         isUnicode = false;
1741                                                                         currentCharacter = source[currentPosition++];
1742                                                                         // if (((currentCharacter = source[currentPosition++])
1743                                                                         // == '\\')
1744                                                                         // && (source[currentPosition] == 'u')) {
1745                                                                         // isUnicode = true;
1746                                                                         // //-------------unicode traitement ------------
1747                                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1748                                                                         // currentPosition++;
1749                                                                         // while (source[currentPosition] == 'u') {
1750                                                                         // currentPosition++;
1751                                                                         // }
1752                                                                         // if ((c1 =
1753                                                                         // Character.getNumericValue(source[currentPosition++]))
1754                                                                         // > 15
1755                                                                         // || c1 < 0
1756                                                                         // || (c2 =
1757                                                                         // Character.getNumericValue(
1758                                                                         // source[currentPosition++]))
1759                                                                         // > 15
1760                                                                         // || c2 < 0
1761                                                                         // || (c3 =
1762                                                                         // Character.getNumericValue(
1763                                                                         // source[currentPosition++]))
1764                                                                         // > 15
1765                                                                         // || c3 < 0
1766                                                                         // || (c4 =
1767                                                                         // Character.getNumericValue(
1768                                                                         // source[currentPosition++]))
1769                                                                         // > 15
1770                                                                         // || c4 < 0) {
1771                                                                         // throw new
1772                                                                         // InvalidInputException(INVALID_UNICODE_ESCAPE);
1773                                                                         // } else {
1774                                                                         // currentCharacter =
1775                                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1776                                                                         // }
1777                                                                         // }
1778                                                                         // handle the \\u case manually into comment
1779                                                                         // if (currentCharacter == '\\') {
1780                                                                         // if (source[currentPosition] == '\\')
1781                                                                         // currentPosition++;
1782                                                                         // } //jump over the \\
1783                                                                 }
1784                                                                 if (isUnicode) {
1785                                                                         endPositionForLineComment = currentPosition - 6;
1786                                                                 } else {
1787                                                                         endPositionForLineComment = currentPosition - 1;
1788                                                                 }
1789                                                                 // recordComment(false);
1790                                                                 recordComment(TokenNameCOMMENT_LINE);
1791                                                                 if (this.taskTags != null)
1792                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1793                                                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1794                                                                         checkNonExternalizeString();
1795                                                                         if (recordLineSeparator) {
1796                                                                                 if (isUnicode) {
1797                                                                                         pushUnicodeLineSeparator();
1798                                                                                 } else {
1799                                                                                         pushLineSeparator();
1800                                                                                 }
1801                                                                         } else {
1802                                                                                 currentLine = null;
1803                                                                         }
1804                                                                 }
1805                                                                 if (tokenizeComments) {
1806                                                                         if (!isUnicode) {
1807                                                                                 currentPosition = endPositionForLineComment;
1808                                                                                 // reset one character behind
1809                                                                         }
1810                                                                         return TokenNameCOMMENT_LINE;
1811                                                                 }
1812                                                         } catch (IndexOutOfBoundsException e) { // an eof will them
1813                                                                 // be generated
1814                                                                 if (tokenizeComments) {
1815                                                                         currentPosition--;
1816                                                                         // reset one character behind
1817                                                                         return TokenNameCOMMENT_LINE;
1818                                                                 }
1819                                                         }
1820                                                         break;
1821                                                 }
1822                                                 if (test > 0) {
1823                                                         // traditional and annotation comment
1824                                                         boolean isJavadoc = false, star = false;
1825                                                         // consume next character
1826                                                         unicodeAsBackSlash = false;
1827                                                         currentCharacter = source[currentPosition++];
1828                                                         // if (((currentCharacter = source[currentPosition++]) ==
1829                                                         // '\\')
1830                                                         // && (source[currentPosition] == 'u')) {
1831                                                         // getNextUnicodeChar();
1832                                                         // } else {
1833                                                         // if (withoutUnicodePtr != 0) {
1834                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] =
1835                                                         // currentCharacter;
1836                                                         // }
1837                                                         // }
1838                                                         if (currentCharacter == '*') {
1839                                                                 isJavadoc = true;
1840                                                                 star = true;
1841                                                         }
1842                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1843                                                                 checkNonExternalizeString();
1844                                                                 if (recordLineSeparator) {
1845                                                                         pushLineSeparator();
1846                                                                 } else {
1847                                                                         currentLine = null;
1848                                                                 }
1849                                                         }
1850                                                         try { // get the next char
1851                                                                 currentCharacter = source[currentPosition++];
1852                                                                 // if (((currentCharacter = source[currentPosition++])
1853                                                                 // == '\\')
1854                                                                 // && (source[currentPosition] == 'u')) {
1855                                                                 // //-------------unicode traitement ------------
1856                                                                 // getNextUnicodeChar();
1857                                                                 // }
1858                                                                 // handle the \\u case manually into comment
1859                                                                 // if (currentCharacter == '\\') {
1860                                                                 // if (source[currentPosition] == '\\')
1861                                                                 // currentPosition++;
1862                                                                 // //jump over the \\
1863                                                                 // }
1864                                                                 // empty comment is not a javadoc /**/
1865                                                                 if (currentCharacter == '/') {
1866                                                                         isJavadoc = false;
1867                                                                 }
1868                                                                 // loop until end of comment */
1869                                                                 while ((currentCharacter != '/') || (!star)) {
1870                                                                         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1871                                                                                 checkNonExternalizeString();
1872                                                                                 if (recordLineSeparator) {
1873                                                                                         pushLineSeparator();
1874                                                                                 } else {
1875                                                                                         currentLine = null;
1876                                                                                 }
1877                                                                         }
1878                                                                         star = currentCharacter == '*';
1879                                                                         // get next char
1880                                                                         currentCharacter = source[currentPosition++];
1881                                                                         // if (((currentCharacter = source[currentPosition++])
1882                                                                         // == '\\')
1883                                                                         // && (source[currentPosition] == 'u')) {
1884                                                                         // //-------------unicode traitement ------------
1885                                                                         // getNextUnicodeChar();
1886                                                                         // }
1887                                                                         // handle the \\u case manually into comment
1888                                                                         // if (currentCharacter == '\\') {
1889                                                                         // if (source[currentPosition] == '\\')
1890                                                                         // currentPosition++;
1891                                                                         // } //jump over the \\
1892                                                                 }
1893                                                                 // recordComment(isJavadoc);
1894                                                                 if (isJavadoc) {
1895                                                                         recordComment(TokenNameCOMMENT_PHPDOC);
1896                                                                 } else {
1897                                                                         recordComment(TokenNameCOMMENT_BLOCK);
1898                                                                 }
1899
1900                                                                 if (tokenizeComments) {
1901                                                                         if (isJavadoc)
1902                                                                                 return TokenNameCOMMENT_PHPDOC;
1903                                                                         return TokenNameCOMMENT_BLOCK;
1904                                                                 }
1905
1906                                                                 if (this.taskTags != null) {
1907                                                                         checkTaskTag(this.startPosition, this.currentPosition);
1908                                                                 }
1909                                                         } catch (IndexOutOfBoundsException e) {
1910                                                                 // reset end position for error reporting
1911                                                                 currentPosition -= 2;
1912                                                                 throw new InvalidInputException(UNTERMINATED_COMMENT);
1913                                                         }
1914                                                         break;
1915                                                 }
1916                                                 return TokenNameDIVIDE;
1917                                         }
1918                                         case '\u001a':
1919                                                 if (atEnd())
1920                                                         return TokenNameEOF;
1921                                                 // the atEnd may not be <currentPosition == source.length> if
1922                                                 // source is only some part of a real (external) stream
1923                                                 throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1924                                         default:
1925                                                 if (currentCharacter == '$') {
1926                                                         int oldPosition = currentPosition;
1927                                                         try {
1928                                                                 currentCharacter = source[currentPosition++];
1929                                                                 if (isPHPIdentifierStart(currentCharacter)) {
1930                                                                         return scanIdentifierOrKeyword(true);
1931                                                                 } else {
1932                                                                         currentPosition = oldPosition;
1933                                                                         return TokenNameDOLLAR;
1934                                                                 }
1935                                                         } catch (IndexOutOfBoundsException e) {
1936                                                                 currentPosition = oldPosition;
1937                                                                 return TokenNameDOLLAR;
1938                                                         }
1939                                                 }
1940                                                 if (isPHPIdentifierStart(currentCharacter))
1941                                                         return scanIdentifierOrKeyword(false);
1942                                                 if (Character.isDigit(currentCharacter))
1943                                                         return scanNumber(false);
1944                                                 return TokenNameERROR;
1945                                         }
1946                                 }
1947                         } // -----------------end switch while try--------------------
1948                         catch (IndexOutOfBoundsException e) {
1949                         }
1950                 }
1951                 return TokenNameEOF;
1952         }
1953
1954         /**
1955          * @return
1956          * @throws InvalidInputException
1957          */
1958         private int getInlinedHTMLToken(int start) throws InvalidInputException {
1959                 boolean phpShortTag = false;  // true, if <?= detected
1960                 if (currentPosition > source.length) {
1961                         currentPosition = source.length;
1962                         return TokenNameEOF;
1963                 }
1964                 startPosition = start;
1965                 try {
1966                         while (!phpMode) {
1967                                 currentCharacter = source[currentPosition++];
1968                                 if (currentCharacter == '<') {
1969                                         if (getNextChar('?')) {
1970                                                 currentCharacter = source[currentPosition++];
1971                                                 if ((currentCharacter != 'P') && (currentCharacter != 'p')) {
1972                                                         if (currentCharacter != '=') { // <?=
1973                                                                 currentPosition--;
1974                                                                 phpShortTag = false;
1975                                                         } else {
1976                                                                 phpShortTag = true;
1977                                                         }
1978                                                         // <?
1979                                                         if (ignorePHPOneLiner) { // for CodeFormatter
1980                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
1981                                                                         phpMode = true;
1982                                                                         if (phpShortTag) {
1983                                                                                 fFillerToken = TokenNameecho;
1984                                                                         }
1985                                                                         return TokenNameINLINE_HTML;
1986                                                                 }
1987                                                         } else {
1988                                                                 phpMode = true;
1989                                                                 if (phpShortTag) {
1990                                                                         fFillerToken = TokenNameecho;
1991                                                                 }
1992                                                                 return TokenNameINLINE_HTML;
1993                                                         }
1994                                                 } else {
1995                                                         int test = getNextChar('H', 'h');
1996                                                         if (test >= 0) {
1997                                                                 test = getNextChar('P', 'p');
1998                                                                 if (test >= 0) {
1999                                                                         // <?PHP <?php
2000                                                                         if (ignorePHPOneLiner) {
2001                                                                                 if (lookAheadLinePHPTag() == TokenNameINLINE_HTML) {
2002                                                                                         phpMode = true;
2003                                                                                         return TokenNameINLINE_HTML;
2004                                                                                 }
2005                                                                         } else {
2006                                                                                 phpMode = true;
2007                                                                                 return TokenNameINLINE_HTML;
2008                                                                         }
2009                                                                 }
2010                                                         }
2011                                                         // }
2012                                                 }
2013                                         }
2014                                 }
2015                                 if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
2016                                         if (recordLineSeparator) {
2017                                                 pushLineSeparator();
2018                                         } else {
2019                                                 currentLine = null;
2020                                         }
2021                                 }
2022                         } // -----------------while--------------------
2023                         phpMode = true;
2024                         return TokenNameINLINE_HTML;
2025                 } // -----------------try--------------------
2026                 catch (IndexOutOfBoundsException e) {
2027                         startPosition = start;
2028                         currentPosition--;
2029                 }
2030                 phpMode = true;
2031                 return TokenNameINLINE_HTML;
2032         }
2033
2034         /**
2035          * check if the PHP is only in this line (for CodeFormatter)
2036          *
2037          * @return
2038          */
2039         private int lookAheadLinePHPTag() {
2040                 int currentPositionInLine = currentPosition;
2041                 char previousCharInLine = ' ';
2042                 char currentCharInLine = ' ';
2043                 boolean singleQuotedStringActive = false;
2044                 boolean doubleQuotedStringActive = false;
2045
2046                 try {
2047                         // look ahead in this line
2048                         while (true) {
2049                                 previousCharInLine = currentCharInLine;
2050                                 currentCharInLine = source[currentPositionInLine++];
2051                                 switch (currentCharInLine) {
2052                                 case '>':
2053                                         if (previousCharInLine == '?') {
2054                                                 // update the scanner's current Position in the source
2055                                                 currentPosition = currentPositionInLine;
2056                                                 // use as "dummy" token
2057                                                 return TokenNameEOF;
2058                                         }
2059                                         break;
2060                                 case '\\':
2061                                         if (doubleQuotedStringActive) {
2062                                                 // ignore escaped characters in double quoted strings
2063                                                 previousCharInLine = currentCharInLine;
2064                                                 currentCharInLine = source[currentPositionInLine++];
2065                                         }
2066                                 case '\"':
2067                                         if (doubleQuotedStringActive) {
2068                                                 doubleQuotedStringActive = false;
2069                                         } else {
2070                                                 if (!singleQuotedStringActive) {
2071                                                         doubleQuotedStringActive = true;
2072                                                 }
2073                                         }
2074                                         break;
2075                                 case '\'':
2076                                         if (singleQuotedStringActive) {
2077                                                 if (previousCharInLine != '\\') {
2078                                                         singleQuotedStringActive = false;
2079                                                 }
2080                                         } else {
2081                                                 if (!doubleQuotedStringActive) {
2082                                                         singleQuotedStringActive = true;
2083                                                 }
2084                                         }
2085                                         break;
2086                                 case '\n':
2087                                         phpMode = true;
2088                                         return TokenNameINLINE_HTML;
2089                                 case '#':
2090                                         if (!singleQuotedStringActive && !doubleQuotedStringActive) {
2091                                                 phpMode = true;
2092                                                 return TokenNameINLINE_HTML;
2093                                         }
2094                                         break;
2095                                 case '/':
2096                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2097                                                 phpMode = true;
2098                                                 return TokenNameINLINE_HTML;
2099                                         }
2100                                         break;
2101                                 case '*':
2102                                         if (previousCharInLine == '/' && !singleQuotedStringActive && !doubleQuotedStringActive) {
2103                                                 phpMode = true;
2104                                                 return TokenNameINLINE_HTML;
2105                                         }
2106                                         break;
2107                                 }
2108                         }
2109                 } catch (IndexOutOfBoundsException e) {
2110                         phpMode = true;
2111                         currentPosition = currentPositionInLine;
2112                         return TokenNameINLINE_HTML;
2113                 }
2114         }
2115
2116         // public final void getNextUnicodeChar()
2117         // throws IndexOutOfBoundsException, InvalidInputException {
2118         // //VOID
2119         // //handle the case of unicode.
2120         // //when a unicode appears then we must use a buffer that holds char
2121         // internal values
2122         // //At the end of this method currentCharacter holds the new visited char
2123         // //and currentPosition points right next after it
2124         //
2125         // //ALL getNextChar.... ARE OPTIMIZED COPIES
2126         //
2127         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
2128         // currentPosition++;
2129         // while (source[currentPosition] == 'u') {
2130         // currentPosition++;
2131         // unicodeSize++;
2132         // }
2133         //
2134         // if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2135         // || c1 < 0
2136         // || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
2137         // || c2 < 0
2138         // || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
2139         // || c3 < 0
2140         // || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
2141         // || c4 < 0) {
2142         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2143         // } else {
2144         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2145         // //need the unicode buffer
2146         // if (withoutUnicodePtr == 0) {
2147         // //buffer all the entries that have been left aside....
2148         // withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
2149         // System.arraycopy(
2150         // source,
2151         // startPosition,
2152         // withoutUnicodeBuffer,
2153         // 1,
2154         // withoutUnicodePtr);
2155         // }
2156         // //fill the buffer with the char
2157         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2158         // }
2159         // unicodeAsBackSlash = currentCharacter == '\\';
2160         // }
2161         /*
2162          * Tokenize a method body, assuming that curly brackets are properly balanced.
2163          */
2164         public final void jumpOverMethodBody() {
2165                 this.wasAcr = false;
2166                 int found = 1;
2167                 try {
2168                         while (true) { // loop for jumping over comments
2169                                 // ---------Consume white space and handles startPosition---------
2170                                 boolean isWhiteSpace;
2171                                 do {
2172                                         startPosition = currentPosition;
2173                                         currentCharacter = source[currentPosition++];
2174                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2175                                         // && (source[currentPosition] == 'u')) {
2176                                         // isWhiteSpace = jumpOverUnicodeWhiteSpace();
2177                                         // } else {
2178                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2179                                                 pushLineSeparator();
2180                                         isWhiteSpace = Character.isWhitespace(currentCharacter);
2181                                         // }
2182                                 } while (isWhiteSpace);
2183                                 // -------consume token until } is found---------
2184                                 switch (currentCharacter) {
2185                                 case '{':
2186                                         found++;
2187                                         break;
2188                                 case '}':
2189                                         found--;
2190                                         if (found == 0)
2191                                                 return;
2192                                         break;
2193                                 case '\'': {
2194                                         boolean test;
2195                                         test = getNextChar('\\');
2196                                         if (test) {
2197                                                 try {
2198                                                         scanDoubleQuotedEscapeCharacter();
2199                                                 } catch (InvalidInputException ex) {
2200                                                 }
2201                                                 ;
2202                                         } else {
2203                                                 // try { // consume next character
2204                                                 unicodeAsBackSlash = false;
2205                                                 currentCharacter = source[currentPosition++];
2206                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2207                                                 // && (source[currentPosition] == 'u')) {
2208                                                 // getNextUnicodeChar();
2209                                                 // } else {
2210                                                 if (withoutUnicodePtr != 0) {
2211                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2212                                                 }
2213                                                 // }
2214                                                 // } catch (InvalidInputException ex) {
2215                                                 // };
2216                                         }
2217                                         getNextChar('\'');
2218                                         break;
2219                                 }
2220                                 case '"':
2221                                         try {
2222                                                 // try { // consume next character
2223                                                 unicodeAsBackSlash = false;
2224                                                 currentCharacter = source[currentPosition++];
2225                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2226                                                 // && (source[currentPosition] == 'u')) {
2227                                                 // getNextUnicodeChar();
2228                                                 // } else {
2229                                                 if (withoutUnicodePtr != 0) {
2230                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2231                                                 }
2232                                                 // }
2233                                                 // } catch (InvalidInputException ex) {
2234                                                 // };
2235                                                 while (currentCharacter != '"') {
2236                                                         if (currentCharacter == '\r') {
2237                                                                 if (source[currentPosition] == '\n')
2238                                                                         currentPosition++;
2239                                                                 break;
2240                                                                 // the string cannot go further that the line
2241                                                         }
2242                                                         if (currentCharacter == '\n') {
2243                                                                 break;
2244                                                                 // the string cannot go further that the line
2245                                                         }
2246                                                         if (currentCharacter == '\\') {
2247                                                                 try {
2248                                                                         scanDoubleQuotedEscapeCharacter();
2249                                                                 } catch (InvalidInputException ex) {
2250                                                                 }
2251                                                                 ;
2252                                                         }
2253                                                         // try { // consume next character
2254                                                         unicodeAsBackSlash = false;
2255                                                         currentCharacter = source[currentPosition++];
2256                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
2257                                                         // && (source[currentPosition] == 'u')) {
2258                                                         // getNextUnicodeChar();
2259                                                         // } else {
2260                                                         if (withoutUnicodePtr != 0) {
2261                                                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2262                                                         }
2263                                                         // }
2264                                                         // } catch (InvalidInputException ex) {
2265                                                         // };
2266                                                 }
2267                                         } catch (IndexOutOfBoundsException e) {
2268                                                 return;
2269                                         }
2270                                         break;
2271                                 case '/': {
2272                                         int test;
2273                                         if ((test = getNextChar('/', '*')) == 0) {
2274                                                 // line comment
2275                                                 try {
2276                                                         // get the next char
2277                                                         currentCharacter = source[currentPosition++];
2278                                                         // if (((currentCharacter = source[currentPosition++]) ==
2279                                                         // '\\')
2280                                                         // && (source[currentPosition] == 'u')) {
2281                                                         // //-------------unicode traitement ------------
2282                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2283                                                         // currentPosition++;
2284                                                         // while (source[currentPosition] == 'u') {
2285                                                         // currentPosition++;
2286                                                         // }
2287                                                         // if ((c1 =
2288                                                         // Character.getNumericValue(source[currentPosition++]))
2289                                                         // > 15
2290                                                         // || c1 < 0
2291                                                         // || (c2 =
2292                                                         // Character.getNumericValue(source[currentPosition++]))
2293                                                         // > 15
2294                                                         // || c2 < 0
2295                                                         // || (c3 =
2296                                                         // Character.getNumericValue(source[currentPosition++]))
2297                                                         // > 15
2298                                                         // || c3 < 0
2299                                                         // || (c4 =
2300                                                         // Character.getNumericValue(source[currentPosition++]))
2301                                                         // > 15
2302                                                         // || c4 < 0) {
2303                                                         // //error don't care of the value
2304                                                         // currentCharacter = 'A';
2305                                                         // } //something different from \n and \r
2306                                                         // else {
2307                                                         // currentCharacter =
2308                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2309                                                         // }
2310                                                         // }
2311                                                         while (currentCharacter != '\r' && currentCharacter != '\n') {
2312                                                                 // get the next char
2313                                                                 currentCharacter = source[currentPosition++];
2314                                                                 // if (((currentCharacter = source[currentPosition++])
2315                                                                 // == '\\')
2316                                                                 // && (source[currentPosition] == 'u')) {
2317                                                                 // //-------------unicode traitement ------------
2318                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2319                                                                 // currentPosition++;
2320                                                                 // while (source[currentPosition] == 'u') {
2321                                                                 // currentPosition++;
2322                                                                 // }
2323                                                                 // if ((c1 =
2324                                                                 // Character.getNumericValue(source[currentPosition++]))
2325                                                                 // > 15
2326                                                                 // || c1 < 0
2327                                                                 // || (c2 =
2328                                                                 // Character.getNumericValue(source[currentPosition++]))
2329                                                                 // > 15
2330                                                                 // || c2 < 0
2331                                                                 // || (c3 =
2332                                                                 // Character.getNumericValue(source[currentPosition++]))
2333                                                                 // > 15
2334                                                                 // || c3 < 0
2335                                                                 // || (c4 =
2336                                                                 // Character.getNumericValue(source[currentPosition++]))
2337                                                                 // > 15
2338                                                                 // || c4 < 0) {
2339                                                                 // //error don't care of the value
2340                                                                 // currentCharacter = 'A';
2341                                                                 // } //something different from \n and \r
2342                                                                 // else {
2343                                                                 // currentCharacter =
2344                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2345                                                                 // }
2346                                                                 // }
2347                                                         }
2348                                                         if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2349                                                                 pushLineSeparator();
2350                                                 } catch (IndexOutOfBoundsException e) {
2351                                                 } // an eof will them be generated
2352                                                 break;
2353                                         }
2354                                         if (test > 0) {
2355                                                 // traditional and annotation comment
2356                                                 boolean star = false;
2357                                                 // try { // consume next character
2358                                                 unicodeAsBackSlash = false;
2359                                                 currentCharacter = source[currentPosition++];
2360                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
2361                                                 // && (source[currentPosition] == 'u')) {
2362                                                 // getNextUnicodeChar();
2363                                                 // } else {
2364                                                 if (withoutUnicodePtr != 0) {
2365                                                         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2366                                                 }
2367                                                 // };
2368                                                 // } catch (InvalidInputException ex) {
2369                                                 // };
2370                                                 if (currentCharacter == '*') {
2371                                                         star = true;
2372                                                 }
2373                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2374                                                         pushLineSeparator();
2375                                                 try { // get the next char
2376                                                         currentCharacter = source[currentPosition++];
2377                                                         // if (((currentCharacter = source[currentPosition++]) ==
2378                                                         // '\\')
2379                                                         // && (source[currentPosition] == 'u')) {
2380                                                         // //-------------unicode traitement ------------
2381                                                         // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2382                                                         // currentPosition++;
2383                                                         // while (source[currentPosition] == 'u') {
2384                                                         // currentPosition++;
2385                                                         // }
2386                                                         // if ((c1 =
2387                                                         // Character.getNumericValue(source[currentPosition++]))
2388                                                         // > 15
2389                                                         // || c1 < 0
2390                                                         // || (c2 =
2391                                                         // Character.getNumericValue(source[currentPosition++]))
2392                                                         // > 15
2393                                                         // || c2 < 0
2394                                                         // || (c3 =
2395                                                         // Character.getNumericValue(source[currentPosition++]))
2396                                                         // > 15
2397                                                         // || c3 < 0
2398                                                         // || (c4 =
2399                                                         // Character.getNumericValue(source[currentPosition++]))
2400                                                         // > 15
2401                                                         // || c4 < 0) {
2402                                                         // //error don't care of the value
2403                                                         // currentCharacter = 'A';
2404                                                         // } //something different from * and /
2405                                                         // else {
2406                                                         // currentCharacter =
2407                                                         // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2408                                                         // }
2409                                                         // }
2410                                                         // loop until end of comment */
2411                                                         while ((currentCharacter != '/') || (!star)) {
2412                                                                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2413                                                                         pushLineSeparator();
2414                                                                 star = currentCharacter == '*';
2415                                                                 // get next char
2416                                                                 currentCharacter = source[currentPosition++];
2417                                                                 // if (((currentCharacter = source[currentPosition++])
2418                                                                 // == '\\')
2419                                                                 // && (source[currentPosition] == 'u')) {
2420                                                                 // //-------------unicode traitement ------------
2421                                                                 // int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2422                                                                 // currentPosition++;
2423                                                                 // while (source[currentPosition] == 'u') {
2424                                                                 // currentPosition++;
2425                                                                 // }
2426                                                                 // if ((c1 =
2427                                                                 // Character.getNumericValue(source[currentPosition++]))
2428                                                                 // > 15
2429                                                                 // || c1 < 0
2430                                                                 // || (c2 =
2431                                                                 // Character.getNumericValue(source[currentPosition++]))
2432                                                                 // > 15
2433                                                                 // || c2 < 0
2434                                                                 // || (c3 =
2435                                                                 // Character.getNumericValue(source[currentPosition++]))
2436                                                                 // > 15
2437                                                                 // || c3 < 0
2438                                                                 // || (c4 =
2439                                                                 // Character.getNumericValue(source[currentPosition++]))
2440                                                                 // > 15
2441                                                                 // || c4 < 0) {
2442                                                                 // //error don't care of the value
2443                                                                 // currentCharacter = 'A';
2444                                                                 // } //something different from * and /
2445                                                                 // else {
2446                                                                 // currentCharacter =
2447                                                                 // (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2448                                                                 // }
2449                                                                 // }
2450                                                         }
2451                                                 } catch (IndexOutOfBoundsException e) {
2452                                                         return;
2453                                                 }
2454                                                 break;
2455                                         }
2456                                         break;
2457                                 }
2458                                 default:
2459                                         if (isPHPIdentOrVarStart(currentCharacter)) {
2460                                                 try {
2461                                                         scanIdentifierOrKeyword((currentCharacter == '$'));
2462                                                 } catch (InvalidInputException ex) {
2463                                                 }
2464                                                 ;
2465                                                 break;
2466                                         }
2467                                         if (ObviousIdentCharNatures[currentCharacter] == C_DIGIT) {
2468                                                 // if (Character.isDigit(currentCharacter)) {
2469                                                 try {
2470                                                         scanNumber(false);
2471                                                 } catch (InvalidInputException ex) {
2472                                                 }
2473                                                 ;
2474                                                 break;
2475                                         }
2476                                 }
2477                         }
2478                         // -----------------end switch while try--------------------
2479                 } catch (IndexOutOfBoundsException e) {
2480                 } catch (InvalidInputException e) {
2481                 }
2482                 return;
2483         }
2484
2485         // public final boolean jumpOverUnicodeWhiteSpace()
2486         // throws InvalidInputException {
2487         // //BOOLEAN
2488         // //handle the case of unicode. Jump over the next whiteSpace
2489         // //making startPosition pointing on the next available char
2490         // //On false, the currentCharacter is filled up with a potential
2491         // //correct char
2492         //
2493         // try {
2494         // this.wasAcr = false;
2495         // int c1, c2, c3, c4;
2496         // int unicodeSize = 6;
2497         // currentPosition++;
2498         // while (source[currentPosition] == 'u') {
2499         // currentPosition++;
2500         // unicodeSize++;
2501         // }
2502         //
2503         // if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2504         // || c1 < 0)
2505         // || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2506         // || c2 < 0)
2507         // || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2508         // || c3 < 0)
2509         // || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2510         // || c4 < 0)) {
2511         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2512         // }
2513         //
2514         // currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2515         // if (recordLineSeparator
2516         // && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2517         // pushLineSeparator();
2518         // if (Character.isWhitespace(currentCharacter))
2519         // return true;
2520         //
2521         // //buffer the new char which is not a white space
2522         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2523         // //withoutUnicodePtr == 1 is true here
2524         // return false;
2525         // } catch (IndexOutOfBoundsException e) {
2526         // throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2527         // }
2528         // }
2529         public final int[] getLineEnds() {
2530                 // return a bounded copy of this.lineEnds
2531                 int[] copy;
2532                 System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2533                 return copy;
2534         }
2535
2536         public char[] getSource() {
2537                 return this.source;
2538         }
2539
2540         public static boolean isIdentifierOrKeyword(int token) {
2541                 return (token == TokenNameIdentifier) || (token > TokenNameKEYWORD);
2542         }
2543
2544         final char[] optimizedCurrentTokenSource1() {
2545                 // return always the same char[] build only once
2546                 // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2547                 char charOne = source[startPosition];
2548                 switch (charOne) {
2549                 case 'a':
2550                         return charArray_a;
2551                 case 'b':
2552                         return charArray_b;
2553                 case 'c':
2554                         return charArray_c;
2555                 case 'd':
2556                         return charArray_d;
2557                 case 'e':
2558                         return charArray_e;
2559                 case 'f':
2560                         return charArray_f;
2561                 case 'g':
2562                         return charArray_g;
2563                 case 'h':
2564                         return charArray_h;
2565                 case 'i':
2566                         return charArray_i;
2567                 case 'j':
2568                         return charArray_j;
2569                 case 'k':
2570                         return charArray_k;
2571                 case 'l':
2572                         return charArray_l;
2573                 case 'm':
2574                         return charArray_m;
2575                 case 'n':
2576                         return charArray_n;
2577                 case 'o':
2578                         return charArray_o;
2579                 case 'p':
2580                         return charArray_p;
2581                 case 'q':
2582                         return charArray_q;
2583                 case 'r':
2584                         return charArray_r;
2585                 case 's':
2586                         return charArray_s;
2587                 case 't':
2588                         return charArray_t;
2589                 case 'u':
2590                         return charArray_u;
2591                 case 'v':
2592                         return charArray_v;
2593                 case 'w':
2594                         return charArray_w;
2595                 case 'x':
2596                         return charArray_x;
2597                 case 'y':
2598                         return charArray_y;
2599                 case 'z':
2600                         return charArray_z;
2601                 default:
2602                         return new char[] { charOne };
2603                 }
2604         }
2605
2606         final char[] optimizedCurrentTokenSource2() {
2607                 char c0, c1;
2608                 c0 = source[startPosition];
2609                 c1 = source[startPosition + 1];
2610                 if (c0 == '$') {
2611                         // return always the same char[] build only once
2612                         // optimization at no speed cost of 99.5 % of the singleCharIdentifier
2613                         switch (c1) {
2614                         case 'a':
2615                                 return charArray_va;
2616                         case 'b':
2617                                 return charArray_vb;
2618                         case 'c':
2619                                 return charArray_vc;
2620                         case 'd':
2621                                 return charArray_vd;
2622                         case 'e':
2623                                 return charArray_ve;
2624                         case 'f':
2625                                 return charArray_vf;
2626                         case 'g':
2627                                 return charArray_vg;
2628                         case 'h':
2629                                 return charArray_vh;
2630                         case 'i':
2631                                 return charArray_vi;
2632                         case 'j':
2633                                 return charArray_vj;
2634                         case 'k':
2635                                 return charArray_vk;
2636                         case 'l':
2637                                 return charArray_vl;
2638                         case 'm':
2639                                 return charArray_vm;
2640                         case 'n':
2641                                 return charArray_vn;
2642                         case 'o':
2643                                 return charArray_vo;
2644                         case 'p':
2645                                 return charArray_vp;
2646                         case 'q':
2647                                 return charArray_vq;
2648                         case 'r':
2649                                 return charArray_vr;
2650                         case 's':
2651                                 return charArray_vs;
2652                         case 't':
2653                                 return charArray_vt;
2654                         case 'u':
2655                                 return charArray_vu;
2656                         case 'v':
2657                                 return charArray_vv;
2658                         case 'w':
2659                                 return charArray_vw;
2660                         case 'x':
2661                                 return charArray_vx;
2662                         case 'y':
2663                                 return charArray_vy;
2664                         case 'z':
2665                                 return charArray_vz;
2666                         }
2667                 }
2668                 // try to return the same char[] build only once
2669                 int hash = ((c0 << 6) + c1) % TableSize;
2670                 char[][] table = charArray_length[0][hash];
2671                 int i = newEntry2;
2672                 while (++i < InternalTableSize) {
2673                         char[] charArray = table[i];
2674                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2675                                 return charArray;
2676                 }
2677                 // ---------other side---------
2678                 i = -1;
2679                 int max = newEntry2;
2680                 while (++i <= max) {
2681                         char[] charArray = table[i];
2682                         if ((c0 == charArray[0]) && (c1 == charArray[1]))
2683                                 return charArray;
2684                 }
2685                 // --------add the entry-------
2686                 if (++max >= InternalTableSize)
2687                         max = 0;
2688                 char[] r;
2689                 table[max] = (r = new char[] { c0, c1 });
2690                 newEntry2 = max;
2691                 return r;
2692         }
2693
2694         final char[] optimizedCurrentTokenSource3() {
2695                 // try to return the same char[] build only once
2696                 char c0, c1, c2;
2697                 int hash = (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2698                                 % TableSize;
2699                 char[][] table = charArray_length[1][hash];
2700                 int i = newEntry3;
2701                 while (++i < InternalTableSize) {
2702                         char[] charArray = table[i];
2703                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2704                                 return charArray;
2705                 }
2706                 // ---------other side---------
2707                 i = -1;
2708                 int max = newEntry3;
2709                 while (++i <= max) {
2710                         char[] charArray = table[i];
2711                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2712                                 return charArray;
2713                 }
2714                 // --------add the entry-------
2715                 if (++max >= InternalTableSize)
2716                         max = 0;
2717                 char[] r;
2718                 table[max] = (r = new char[] { c0, c1, c2 });
2719                 newEntry3 = max;
2720                 return r;
2721         }
2722
2723         final char[] optimizedCurrentTokenSource4() {
2724                 // try to return the same char[] build only once
2725                 char c0, c1, c2, c3;
2726                 long hash = ((((long) (c0 = source[startPosition])) << 18) + ((c1 = source[startPosition + 1]) << 12)
2727                                 + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2728                                 % TableSize;
2729                 char[][] table = charArray_length[2][(int) hash];
2730                 int i = newEntry4;
2731                 while (++i < InternalTableSize) {
2732                         char[] charArray = table[i];
2733                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2734                                 return charArray;
2735                 }
2736                 // ---------other side---------
2737                 i = -1;
2738                 int max = newEntry4;
2739                 while (++i <= max) {
2740                         char[] charArray = table[i];
2741                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2742                                 return charArray;
2743                 }
2744                 // --------add the entry-------
2745                 if (++max >= InternalTableSize)
2746                         max = 0;
2747                 char[] r;
2748                 table[max] = (r = new char[] { c0, c1, c2, c3 });
2749                 newEntry4 = max;
2750                 return r;
2751         }
2752
2753         final char[] optimizedCurrentTokenSource5() {
2754                 // try to return the same char[] build only once
2755                 char c0, c1, c2, c3, c4;
2756                 long hash = ((((long) (c0 = source[startPosition])) << 24) + (((long) (c1 = source[startPosition + 1])) << 18)
2757                                 + ((c2 = source[startPosition + 2]) << 12) + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2758                                 % TableSize;
2759                 char[][] table = charArray_length[3][(int) hash];
2760                 int i = newEntry5;
2761                 while (++i < InternalTableSize) {
2762                         char[] charArray = table[i];
2763                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2764                                 return charArray;
2765                 }
2766                 // ---------other side---------
2767                 i = -1;
2768                 int max = newEntry5;
2769                 while (++i <= max) {
2770                         char[] charArray = table[i];
2771                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2772                                 return charArray;
2773                 }
2774                 // --------add the entry-------
2775                 if (++max >= InternalTableSize)
2776                         max = 0;
2777                 char[] r;
2778                 table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2779                 newEntry5 = max;
2780                 return r;
2781         }
2782
2783         final char[] optimizedCurrentTokenSource6() {
2784                 // try to return the same char[] build only once
2785                 char c0, c1, c2, c3, c4, c5;
2786                 long hash = ((((long) (c0 = source[startPosition])) << 32) + (((long) (c1 = source[startPosition + 1])) << 24)
2787                                 + (((long) (c2 = source[startPosition + 2])) << 18) + ((c3 = source[startPosition + 3]) << 12)
2788                                 + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2789                                 % TableSize;
2790                 char[][] table = charArray_length[4][(int) hash];
2791                 int i = newEntry6;
2792                 while (++i < InternalTableSize) {
2793                         char[] charArray = table[i];
2794                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2795                                         && (c5 == charArray[5]))
2796                                 return charArray;
2797                 }
2798                 // ---------other side---------
2799                 i = -1;
2800                 int max = newEntry6;
2801                 while (++i <= max) {
2802                         char[] charArray = table[i];
2803                         if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4])
2804                                         && (c5 == charArray[5]))
2805                                 return charArray;
2806                 }
2807                 // --------add the entry-------
2808                 if (++max >= InternalTableSize)
2809                         max = 0;
2810                 char[] r;
2811                 table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2812                 newEntry6 = max;
2813                 return r;
2814         }
2815
2816         public final void pushLineSeparator() throws InvalidInputException {
2817                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2818                 final int INCREMENT = 250;
2819                 if (this.checkNonExternalizedStringLiterals) {
2820                         // reinitialize the current line for non externalize strings purpose
2821                         currentLine = null;
2822                 }
2823                 // currentCharacter is at position currentPosition-1
2824                 // cr 000D
2825                 if (currentCharacter == '\r') {
2826                         int separatorPos = currentPosition - 1;
2827                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2828                                 return;
2829                         // System.out.println("CR-" + separatorPos);
2830                         try {
2831                                 lineEnds[++linePtr] = separatorPos;
2832                         } catch (IndexOutOfBoundsException e) {
2833                                 // linePtr value is correct
2834                                 int oldLength = lineEnds.length;
2835                                 int[] old = lineEnds;
2836                                 lineEnds = new int[oldLength + INCREMENT];
2837                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2838                                 lineEnds[linePtr] = separatorPos;
2839                         }
2840                         // look-ahead for merged cr+lf
2841                         try {
2842                                 if (source[currentPosition] == '\n') {
2843                                         // System.out.println("look-ahead LF-" + currentPosition);
2844                                         lineEnds[linePtr] = currentPosition;
2845                                         currentPosition++;
2846                                         wasAcr = false;
2847                                 } else {
2848                                         wasAcr = true;
2849                                 }
2850                         } catch (IndexOutOfBoundsException e) {
2851                                 wasAcr = true;
2852                         }
2853                 } else {
2854                         // lf 000A
2855                         if (currentCharacter == '\n') {
2856                                 // must merge eventual cr followed by lf
2857                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2858                                         // System.out.println("merge LF-" + (currentPosition - 1));
2859                                         lineEnds[linePtr] = currentPosition - 1;
2860                                 } else {
2861                                         int separatorPos = currentPosition - 1;
2862                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2863                                                 return;
2864                                         // System.out.println("LF-" + separatorPos);
2865                                         try {
2866                                                 lineEnds[++linePtr] = separatorPos;
2867                                         } catch (IndexOutOfBoundsException e) {
2868                                                 // linePtr value is correct
2869                                                 int oldLength = lineEnds.length;
2870                                                 int[] old = lineEnds;
2871                                                 lineEnds = new int[oldLength + INCREMENT];
2872                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2873                                                 lineEnds[linePtr] = separatorPos;
2874                                         }
2875                                 }
2876                                 wasAcr = false;
2877                         }
2878                 }
2879         }
2880
2881         public final void pushUnicodeLineSeparator() {
2882                 // isUnicode means that the \r or \n has been read as a unicode character
2883                 // see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2884                 final int INCREMENT = 250;
2885                 // currentCharacter is at position currentPosition-1
2886                 if (this.checkNonExternalizedStringLiterals) {
2887                         // reinitialize the current line for non externalize strings purpose
2888                         currentLine = null;
2889                 }
2890                 // cr 000D
2891                 if (currentCharacter == '\r') {
2892                         int separatorPos = currentPosition - 6;
2893                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2894                                 return;
2895                         // System.out.println("CR-" + separatorPos);
2896                         try {
2897                                 lineEnds[++linePtr] = separatorPos;
2898                         } catch (IndexOutOfBoundsException e) {
2899                                 // linePtr value is correct
2900                                 int oldLength = lineEnds.length;
2901                                 int[] old = lineEnds;
2902                                 lineEnds = new int[oldLength + INCREMENT];
2903                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2904                                 lineEnds[linePtr] = separatorPos;
2905                         }
2906                         // look-ahead for merged cr+lf
2907                         if (source[currentPosition] == '\n') {
2908                                 // System.out.println("look-ahead LF-" + currentPosition);
2909                                 lineEnds[linePtr] = currentPosition;
2910                                 currentPosition++;
2911                                 wasAcr = false;
2912                         } else {
2913                                 wasAcr = true;
2914                         }
2915                 } else {
2916                         // lf 000A
2917                         if (currentCharacter == '\n') {
2918                                 // must merge eventual cr followed by lf
2919                                 if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2920                                         // System.out.println("merge LF-" + (currentPosition - 1));
2921                                         lineEnds[linePtr] = currentPosition - 6;
2922                                 } else {
2923                                         int separatorPos = currentPosition - 6;
2924                                         if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2925                                                 return;
2926                                         // System.out.println("LF-" + separatorPos);
2927                                         try {
2928                                                 lineEnds[++linePtr] = separatorPos;
2929                                         } catch (IndexOutOfBoundsException e) {
2930                                                 // linePtr value is correct
2931                                                 int oldLength = lineEnds.length;
2932                                                 int[] old = lineEnds;
2933                                                 lineEnds = new int[oldLength + INCREMENT];
2934                                                 System.arraycopy(old, 0, lineEnds, 0, oldLength);
2935                                                 lineEnds[linePtr] = separatorPos;
2936                                         }
2937                                 }
2938                                 wasAcr = false;
2939                         }
2940                 }
2941         }
2942
2943         public void recordComment(int token) {
2944                 // compute position
2945                 int stopPosition = this.currentPosition;
2946                 switch (token) {
2947                 case TokenNameCOMMENT_LINE:
2948                         stopPosition = -this.lastCommentLinePosition;
2949                         break;
2950                 case TokenNameCOMMENT_BLOCK:
2951                         stopPosition = -this.currentPosition;
2952                         break;
2953                 }
2954
2955                 // a new comment is recorded
2956                 int length = this.commentStops.length;
2957                 if (++this.commentPtr >= length) {
2958                         System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2959                         // grows the positions buffers too
2960                         System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2961                 }
2962                 this.commentStops[this.commentPtr] = stopPosition;
2963                 this.commentStarts[this.commentPtr] = this.startPosition;
2964         }
2965
2966         // public final void recordComment(boolean isJavadoc) {
2967         // // a new annotation comment is recorded
2968         // try {
2969         // commentStops[++commentPtr] = isJavadoc
2970         // ? currentPosition
2971         // : -currentPosition;
2972         // } catch (IndexOutOfBoundsException e) {
2973         // int oldStackLength = commentStops.length;
2974         // int[] oldStack = commentStops;
2975         // commentStops = new int[oldStackLength + 30];
2976         // System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2977         // commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2978         // //grows the positions buffers too
2979         // int[] old = commentStarts;
2980         // commentStarts = new int[oldStackLength + 30];
2981         // System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2982         // }
2983         // //the buffer is of a correct size here
2984         // commentStarts[commentPtr] = startPosition;
2985         // }
2986         public void resetTo(int begin, int end) {
2987                 // reset the scanner to a given position where it may rescan again
2988                 diet = false;
2989                 initialPosition = startPosition = currentPosition = begin;
2990                 eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2991                 commentPtr = -1; // reset comment stack
2992         }
2993
2994         public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2995                 // the string with "\\u" is a legal string of two chars \ and u
2996                 // thus we use a direct access to the source (for regular cases).
2997                 // if (unicodeAsBackSlash) {
2998                 // // consume next character
2999                 // unicodeAsBackSlash = false;
3000                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3001                 // && (source[currentPosition] == 'u')) {
3002                 // getNextUnicodeChar();
3003                 // } else {
3004                 // if (withoutUnicodePtr != 0) {
3005                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3006                 // }
3007                 // }
3008                 // } else
3009                 currentCharacter = source[currentPosition++];
3010                 switch (currentCharacter) {
3011                 case '\'':
3012                         currentCharacter = '\'';
3013                         break;
3014                 case '\\':
3015                         currentCharacter = '\\';
3016                         break;
3017                 default:
3018                         currentCharacter = '\\';
3019                         currentPosition--;
3020                 }
3021         }
3022
3023         public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
3024                 currentCharacter = source[currentPosition++];
3025                 switch (currentCharacter) {
3026                 // case 'b' :
3027                 // currentCharacter = '\b';
3028                 // break;
3029                 case 't':
3030                         currentCharacter = '\t';
3031                         break;
3032                 case 'n':
3033                         currentCharacter = '\n';
3034                         break;
3035                 // case 'f' :
3036                 // currentCharacter = '\f';
3037                 // break;
3038                 case 'r':
3039                         currentCharacter = '\r';
3040                         break;
3041                 case '\"':
3042                         currentCharacter = '\"';
3043                         break;
3044                 case '\'':
3045                         currentCharacter = '\'';
3046                         break;
3047                 case '\\':
3048                         currentCharacter = '\\';
3049                         break;
3050                 case '$':
3051                         currentCharacter = '$';
3052                         break;
3053                 default:
3054                         // -----------octal escape--------------
3055                         // OctalDigit
3056                         // OctalDigit OctalDigit
3057                         // ZeroToThree OctalDigit OctalDigit
3058                         int number = Character.getNumericValue(currentCharacter);
3059                         if (number >= 0 && number <= 7) {
3060                                 boolean zeroToThreeNot = number > 3;
3061                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3062                                         int digit = Character.getNumericValue(currentCharacter);
3063                                         if (digit >= 0 && digit <= 7) {
3064                                                 number = (number * 8) + digit;
3065                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3066                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3067                                                                 // Digit --> ignore last character
3068                                                                 currentPosition--;
3069                                                         } else {
3070                                                                 digit = Character.getNumericValue(currentCharacter);
3071                                                                 if (digit >= 0 && digit <= 7) {
3072                                                                         // has read \ZeroToThree OctalDigit OctalDigit
3073                                                                         number = (number * 8) + digit;
3074                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3075                                                                         // --> ignore last character
3076                                                                         currentPosition--;
3077                                                                 }
3078                                                         }
3079                                                 } else { // has read \OctalDigit NonDigit--> ignore last
3080                                                         // character
3081                                                         currentPosition--;
3082                                                 }
3083                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
3084                                                 // character
3085                                                 currentPosition--;
3086                                         }
3087                                 } else { // has read \OctalDigit --> ignore last character
3088                                         currentPosition--;
3089                                 }
3090                                 if (number > 255)
3091                                         throw new InvalidInputException(INVALID_ESCAPE);
3092                                 currentCharacter = (char) number;
3093                         }
3094                 // else
3095                 // throw new InvalidInputException(INVALID_ESCAPE);
3096                 }
3097         }
3098
3099         // public int scanIdentifierOrKeyword() throws InvalidInputException {
3100         // return scanIdentifierOrKeyword( false );
3101         // }
3102         public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
3103                 // test keywords
3104                 // first dispatch on the first char.
3105                 // then the length. If there are several
3106                 // keywors with the same length AND the same first char, then do another
3107                 // disptach on the second char :-)...cool....but fast !
3108                 useAssertAsAnIndentifier = false;
3109                 while (getNextCharAsJavaIdentifierPart()) {
3110                 }
3111                 ;
3112                 if (isVariable) {
3113                         // if (new String(getCurrentTokenSource()).equals("$this")) {
3114                         // return TokenNamethis;
3115                         // }
3116                         return TokenNameVariable;
3117                 }
3118                 int index, length;
3119                 char[] data;
3120                 char firstLetter;
3121                 // if (withoutUnicodePtr == 0)
3122                 // quick test on length == 1 but not on length > 12 while most identifier
3123                 // have a length which is <= 12...but there are lots of identifier with
3124                 // only one char....
3125                 // {
3126                 if ((length = currentPosition - startPosition) == 1)
3127                         return TokenNameIdentifier;
3128                 // data = source;
3129                 data = new char[length];
3130                 index = startPosition;
3131                 for (int i = 0; i < length; i++) {
3132                         data[i] = Character.toLowerCase(source[index + i]);
3133                 }
3134                 index = 0;
3135                 // } else {
3136                 // if ((length = withoutUnicodePtr) == 1)
3137                 // return TokenNameIdentifier;
3138                 // // data = withoutUnicodeBuffer;
3139                 // data = new char[withoutUnicodeBuffer.length];
3140                 // for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
3141                 // data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
3142                 // }
3143                 // index = 1;
3144                 // }
3145                 firstLetter = data[index];
3146                 switch (firstLetter) {
3147                 case '_':
3148                         switch (length) {
3149                         case 8:
3150                                 // __FILE__
3151                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'i') && (data[++index] == 'l')
3152                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3153                                         return TokenNameFILE;
3154                                 index = 0; // __LINE__
3155                                 if ((data[++index] == '_') && (data[++index] == 'l') && (data[++index] == 'i') && (data[++index] == 'n')
3156                                                 && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == '_'))
3157                                         return TokenNameLINE;
3158                                 break;
3159                         case 9:
3160                                 // __CLASS__
3161                                 if ((data[++index] == '_') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3162                                                 && (data[++index] == 's') && (data[++index] == 's') && (data[++index] == '_') && (data[++index] == '_'))
3163                                         return TokenNameCLASS_C;
3164                                 break;
3165                         case 11:
3166                                 // __METHOD__
3167                                 if ((data[++index] == '_') && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 't')
3168                                                 && (data[++index] == 'h') && (data[++index] == 'o') && (data[++index] == 'd') && (data[++index] == '_')
3169                                                 && (data[++index] == '_'))
3170                                         return TokenNameMETHOD_C;
3171                                 break;
3172                         case 12:
3173                                 // __FUNCTION__
3174                                 if ((data[++index] == '_') && (data[++index] == 'f') && (data[++index] == 'u') && (data[++index] == 'n')
3175                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'o')
3176                                                 && (data[++index] == 'n') && (data[++index] == '_') && (data[++index] == '_'))
3177                                         return TokenNameFUNC_C;
3178                                 break;
3179                         }
3180                         return TokenNameIdentifier;
3181                 case 'a':
3182                         // as and array abstract
3183                         switch (length) {
3184                         case 2:
3185                                 // as
3186                                 if ((data[++index] == 's')) {
3187                                         return TokenNameas;
3188                                 } else {
3189                                         return TokenNameIdentifier;
3190                                 }
3191                         case 3:
3192                                 // and
3193                                 if ((data[++index] == 'n') && (data[++index] == 'd')) {
3194                                         return TokenNameand;
3195                                 } else {
3196                                         return TokenNameIdentifier;
3197                                 }
3198                         case 5:
3199                                 // array
3200                                 if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
3201                                         return TokenNamearray;
3202                                 else
3203                                         return TokenNameIdentifier;
3204                         case 8:
3205                                 if ((data[++index] == 'b') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'r')
3206                                                 && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 't'))
3207                                         return TokenNameabstract;
3208                                 else
3209                                         return TokenNameIdentifier;
3210                         default:
3211                                 return TokenNameIdentifier;
3212                         }
3213                 case 'b':
3214                         // break
3215                         switch (length) {
3216                         case 5:
3217                                 if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
3218                                         return TokenNamebreak;
3219                                 else
3220                                         return TokenNameIdentifier;
3221                         default:
3222                                 return TokenNameIdentifier;
3223                         }
3224                 case 'c':
3225                         // case catch class clone const continue
3226                         switch (length) {
3227                         case 4:
3228                                 if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
3229                                         return TokenNamecase;
3230                                 else
3231                                         return TokenNameIdentifier;
3232                         case 5:
3233                                 if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3234                                         return TokenNamecatch;
3235                                 index = 0;
3236                                 if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
3237                                         return TokenNameclass;
3238                                 index = 0;
3239                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 'e'))
3240                                         return TokenNameclone;
3241                                 index = 0;
3242                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
3243                                         return TokenNameconst;
3244                                 else
3245                                         return TokenNameIdentifier;
3246                         case 8:
3247                                 if ((data[++index] == 'o') && (data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'i')
3248                                                 && (data[++index] == 'n') && (data[++index] == 'u') && (data[++index] == 'e'))
3249                                         return TokenNamecontinue;
3250                                 else
3251                                         return TokenNameIdentifier;
3252                         default:
3253                                 return TokenNameIdentifier;
3254                         }
3255                 case 'd':
3256                         // declare default do die
3257                         // TODO delete define ==> no keyword !
3258                         switch (length) {
3259                         case 2:
3260                                 if ((data[++index] == 'o'))
3261                                         return TokenNamedo;
3262                                 else
3263                                         return TokenNameIdentifier;
3264                         // case 6 :
3265                         // if ((data[++index] == 'e')
3266                         // && (data[++index] == 'f')
3267                         // && (data[++index] == 'i')
3268                         // && (data[++index] == 'n')
3269                         // && (data[++index] == 'e'))
3270                         // return TokenNamedefine;
3271                         // else
3272                         // return TokenNameIdentifier;
3273                         case 7:
3274                                 if ((data[++index] == 'e') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a')
3275                                                 && (data[++index] == 'r') && (data[++index] == 'e'))
3276                                         return TokenNamedeclare;
3277                                 index = 0;
3278                                 if ((data[++index] == 'e') && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'u')
3279                                                 && (data[++index] == 'l') && (data[++index] == 't'))
3280                                         return TokenNamedefault;
3281                                 else
3282                                         return TokenNameIdentifier;
3283                         default:
3284                                 return TokenNameIdentifier;
3285                         }
3286                 case 'e':
3287                         // echo else exit elseif extends eval
3288                         switch (length) {
3289                         case 4:
3290                                 if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
3291                                         return TokenNameecho;
3292                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
3293                                         return TokenNameelse;
3294                                 else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
3295                                         return TokenNameexit;
3296                                 else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
3297                                         return TokenNameeval;
3298                                 else
3299                                         return TokenNameIdentifier;
3300                         case 5:
3301                                 // endif empty
3302                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
3303                                         return TokenNameendif;
3304                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
3305                                         return TokenNameempty;
3306                                 else
3307                                         return TokenNameIdentifier;
3308                         case 6:
3309                                 // endfor
3310                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o')
3311                                                 && (data[++index] == 'r'))
3312                                         return TokenNameendfor;
3313                                 else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 'i')
3314                                                 && (data[++index] == 'f'))
3315                                         return TokenNameelseif;
3316                                 else
3317                                         return TokenNameIdentifier;
3318                         case 7:
3319                                 if ((data[++index] == 'x') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'n')
3320                                                 && (data[++index] == 'd') && (data[++index] == 's'))
3321                                         return TokenNameextends;
3322                                 else
3323                                         return TokenNameIdentifier;
3324                         case 8:
3325                                 // endwhile
3326                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'w') && (data[++index] == 'h')
3327                                                 && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3328                                         return TokenNameendwhile;
3329                                 else
3330                                         return TokenNameIdentifier;
3331                         case 9:
3332                                 // endswitch
3333                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 's') && (data[++index] == 'w')
3334                                                 && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
3335                                         return TokenNameendswitch;
3336                                 else
3337                                         return TokenNameIdentifier;
3338                         case 10:
3339                                 // enddeclare
3340                                 if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'd') && (data[++index] == 'e')
3341                                                 && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 'r')
3342                                                 && (data[++index] == 'e'))
3343                                         return TokenNameenddeclare;
3344                                 index = 0;
3345                                 if ((data[++index] == 'n') // endforeach
3346                                                 && (data[++index] == 'd') && (data[++index] == 'f') && (data[++index] == 'o') && (data[++index] == 'r')
3347                                                 && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'h'))
3348                                         return TokenNameendforeach;
3349                                 else
3350                                         return TokenNameIdentifier;
3351                         default:
3352                                 return TokenNameIdentifier;
3353                         }
3354                 case 'f':
3355                         // for false final function
3356                         switch (length) {
3357                         case 3:
3358                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3359                                         return TokenNamefor;
3360                                 else
3361                                         return TokenNameIdentifier;
3362                         case 5:
3363                                 // if ((data[++index] == 'a') && (data[++index] == 'l')
3364                                 // && (data[++index] == 's') && (data[++index] == 'e'))
3365                                 // return TokenNamefalse;
3366                                 if ((data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
3367                                         return TokenNamefinal;
3368                                 else
3369                                         return TokenNameIdentifier;
3370                         case 7:
3371                                 // foreach
3372                                 if ((data[++index] == 'o') && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a')
3373                                                 && (data[++index] == 'c') && (data[++index] == 'h'))
3374                                         return TokenNameforeach;
3375                                 else
3376                                         return TokenNameIdentifier;
3377                         case 8:
3378                                 // function
3379                                 if ((data[++index] == 'u') && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 't')
3380                                                 && (data[++index] == 'i') && (data[++index] == 'o') && (data[++index] == 'n'))
3381                                         return TokenNamefunction;
3382                                 else
3383                                         return TokenNameIdentifier;
3384                         default:
3385                                 return TokenNameIdentifier;
3386                         }
3387                 case 'g':
3388                         // global
3389                         if (length == 6) {
3390                                 if ((data[++index] == 'l') && (data[++index] == 'o') && (data[++index] == 'b') && (data[++index] == 'a')
3391                                                 && (data[++index] == 'l')) {
3392                                         return TokenNameglobal;
3393                                 }
3394                         }
3395                         return TokenNameIdentifier;
3396                 case 'i':
3397                         // if int isset include include_once instanceof interface implements
3398                         switch (length) {
3399                         case 2:
3400                                 if (data[++index] == 'f')
3401                                         return TokenNameif;
3402                                 else
3403                                         return TokenNameIdentifier;
3404                         // case 3 :
3405                         // if ((data[++index] == 'n') && (data[++index] == 't'))
3406                         // return TokenNameint;
3407                         // else
3408                         // return TokenNameIdentifier;
3409                         case 5:
3410                                 if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3411                                         return TokenNameisset;
3412                                 else
3413                                         return TokenNameIdentifier;
3414                         case 7:
3415                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3416                                                 && (data[++index] == 'd') && (data[++index] == 'e'))
3417                                         return TokenNameinclude;
3418                                 else
3419                                         return TokenNameIdentifier;
3420                         case 9:
3421                                 // interface
3422                                 if ((data[++index] == 'n') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'r')
3423                                                 && (data[++index] == 'f') && (data[++index] == 'a') && (data[++index] == 'c') && (data[++index] == 'e'))
3424                                         return TokenNameinterface;
3425                                 else
3426                                         return TokenNameIdentifier;
3427                         case 10:
3428                                 // instanceof
3429                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't') && (data[++index] == 'a')
3430                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e') && (data[++index] == 'o')
3431                                                 && (data[++index] == 'f'))
3432                                         return TokenNameinstanceof;
3433                                 if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 'l') && (data[++index] == 'e')
3434                                                 && (data[++index] == 'm') && (data[++index] == 'e') && (data[++index] == 'n') && (data[++index] == 't')
3435                                                 && (data[++index] == 's'))
3436                                         return TokenNameimplements;
3437                                 else
3438                                         return TokenNameIdentifier;
3439                         case 12:
3440                                 if ((data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'l') && (data[++index] == 'u')
3441                                                 && (data[++index] == 'd') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3442                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e'))
3443                                         return TokenNameinclude_once;
3444                                 else
3445                                         return TokenNameIdentifier;
3446                         default:
3447                                 return TokenNameIdentifier;
3448                         }
3449                 case 'l':
3450                         // list
3451                         if (length == 4) {
3452                                 if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
3453                                         return TokenNamelist;
3454                                 }
3455                         }
3456                         return TokenNameIdentifier;
3457                 case 'n':
3458                         // new null
3459                         switch (length) {
3460                         case 3:
3461                                 if ((data[++index] == 'e') && (data[++index] == 'w'))
3462                                         return TokenNamenew;
3463                                 else
3464                                         return TokenNameIdentifier;
3465                         // case 4 :
3466                         // if ((data[++index] == 'u') && (data[++index] == 'l')
3467                         // && (data[++index] == 'l'))
3468                         // return TokenNamenull;
3469                         // else
3470                         // return TokenNameIdentifier;
3471                         default:
3472                                 return TokenNameIdentifier;
3473                         }
3474                 case 'o':
3475                         // or old_function
3476                         if (length == 2) {
3477                                 if (data[++index] == 'r') {
3478                                         return TokenNameor;
3479                                 }
3480                         }
3481                         // if (length == 12) {
3482                         // if ((data[++index] == 'l')
3483                         // && (data[++index] == 'd')
3484                         // && (data[++index] == '_')
3485                         // && (data[++index] == 'f')
3486                         // && (data[++index] == 'u')
3487                         // && (data[++index] == 'n')
3488                         // && (data[++index] == 'c')
3489                         // && (data[++index] == 't')
3490                         // && (data[++index] == 'i')
3491                         // && (data[++index] == 'o')
3492                         // && (data[++index] == 'n')) {
3493                         // return TokenNameold_function;
3494                         // }
3495                         // }
3496                         return TokenNameIdentifier;
3497                 case 'p':
3498                         // print public private protected
3499                         switch (length) {
3500                         case 5:
3501                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
3502                                         return TokenNameprint;
3503                                 } else
3504                                         return TokenNameIdentifier;
3505                         case 6:
3506                                 if ((data[++index] == 'u') && (data[++index] == 'b') && (data[++index] == 'l') && (data[++index] == 'i')
3507                                                 && (data[++index] == 'c')) {
3508                                         return TokenNamepublic;
3509                                 } else
3510                                         return TokenNameIdentifier;
3511                         case 7:
3512                                 if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'v') && (data[++index] == 'a')
3513                                                 && (data[++index] == 't') && (data[++index] == 'e')) {
3514                                         return TokenNameprivate;
3515                                 } else
3516                                         return TokenNameIdentifier;
3517                         case 9:
3518                                 if ((data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 't') && (data[++index] == 'e')
3519                                                 && (data[++index] == 'c') && (data[++index] == 't') && (data[++index] == 'e') && (data[++index] == 'd')) {
3520                                         return TokenNameprotected;
3521                                 } else
3522                                         return TokenNameIdentifier;
3523                         }
3524                         return TokenNameIdentifier;
3525                 case 'r':
3526                         // return require require_once
3527                         if (length == 6) {
3528                                 if ((data[++index] == 'e') && (data[++index] == 't') && (data[++index] == 'u') && (data[++index] == 'r')
3529                                                 && (data[++index] == 'n')) {
3530                                         return TokenNamereturn;
3531                                 }
3532                         } else if (length == 7) {
3533                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3534                                                 && (data[++index] == 'r') && (data[++index] == 'e')) {
3535                                         return TokenNamerequire;
3536                                 }
3537                         } else if (length == 12) {
3538                                 if ((data[++index] == 'e') && (data[++index] == 'q') && (data[++index] == 'u') && (data[++index] == 'i')
3539                                                 && (data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == '_') && (data[++index] == 'o')
3540                                                 && (data[++index] == 'n') && (data[++index] == 'c') && (data[++index] == 'e')) {
3541                                         return TokenNamerequire_once;
3542                                 }
3543                         } else
3544                                 return TokenNameIdentifier;
3545                 case 's':
3546                         // self static switch
3547                         switch (length) {
3548 //                      case 4:
3549 //                              if ((data[++index] == 'e') && (data[++index] == 'l') && (data[++index] == 'f')) {
3550 //                                      return TokenNameself;
3551 //                              }
3552 //                              return TokenNameIdentifier;
3553                         case 6:
3554                                 if (data[++index] == 't')
3555                                         if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3556                                                 return TokenNamestatic;
3557                                         } else
3558                                                 return TokenNameIdentifier;
3559                                 else if ((data[index] == 'w') && (data[++index] == 'i') && (data[++index] == 't') && (data[++index] == 'c')
3560                                                 && (data[++index] == 'h'))
3561                                         return TokenNameswitch;
3562                                 else
3563                                         return TokenNameIdentifier;
3564                         default:
3565                                 return TokenNameIdentifier;
3566                         }
3567                 case 't':
3568                         // try true throw
3569                         switch (length) {
3570                         case 3:
3571                                 if ((data[++index] == 'r') && (data[++index] == 'y'))
3572                                         return TokenNametry;
3573                                 else
3574                                         return TokenNameIdentifier;
3575                         // case 4 :
3576                         // if ((data[++index] == 'r') && (data[++index] == 'u')
3577                         // && (data[++index] == 'e'))
3578                         // return TokenNametrue;
3579                         // else
3580                         // return TokenNameIdentifier;
3581                         case 5:
3582                                 if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3583                                         return TokenNamethrow;
3584                                 else
3585                                         return TokenNameIdentifier;
3586                         default:
3587                                 return TokenNameIdentifier;
3588                         }
3589                 case 'u':
3590                         // use unset
3591                         switch (length) {
3592                         case 3:
3593                                 if ((data[++index] == 's') && (data[++index] == 'e'))
3594                                         return TokenNameuse;
3595                                 else
3596                                         return TokenNameIdentifier;
3597                         case 5:
3598                                 if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3599                                         return TokenNameunset;
3600                                 else
3601                                         return TokenNameIdentifier;
3602                         default:
3603                                 return TokenNameIdentifier;
3604                         }
3605                 case 'v':
3606                         // var
3607                         switch (length) {
3608                         case 3:
3609                                 if ((data[++index] == 'a') && (data[++index] == 'r'))
3610                                         return TokenNamevar;
3611                                 else
3612                                         return TokenNameIdentifier;
3613                         default:
3614                                 return TokenNameIdentifier;
3615                         }
3616                 case 'w':
3617                         // while
3618                         switch (length) {
3619                         case 5:
3620                                 if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3621                                         return TokenNamewhile;
3622                                 else
3623                                         return TokenNameIdentifier;
3624                         // case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3625                         // (data[++index]=='e') && (data[++index]=='f')&&
3626                         // (data[++index]=='p'))
3627                         // return TokenNamewidefp ;
3628                         // else
3629                         // return TokenNameIdentifier;
3630                         default:
3631                                 return TokenNameIdentifier;
3632                         }
3633                 case 'x':
3634                         // xor
3635                         switch (length) {
3636                         case 3:
3637                                 if ((data[++index] == 'o') && (data[++index] == 'r'))
3638                                         return TokenNamexor;
3639                                 else
3640                                         return TokenNameIdentifier;
3641                         default:
3642                                 return TokenNameIdentifier;
3643                         }
3644                 default:
3645                         return TokenNameIdentifier;
3646                 }
3647         }
3648
3649         public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3650                 // when entering this method the currentCharacter is the firt
3651                 // digit of the number , i.e. it may be preceeded by a . when
3652                 // dotPrefix is true
3653                 boolean floating = dotPrefix;
3654                 if ((!dotPrefix) && (currentCharacter == '0')) {
3655                         if (getNextChar('x', 'X') >= 0) { // ----------hexa-----------------
3656                                 // force the first char of the hexa number do exist...
3657                                 // consume next character
3658                                 unicodeAsBackSlash = false;
3659                                 currentCharacter = source[currentPosition++];
3660                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3661                                 // && (source[currentPosition] == 'u')) {
3662                                 // getNextUnicodeChar();
3663                                 // } else {
3664                                 // if (withoutUnicodePtr != 0) {
3665                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3666                                 // }
3667                                 // }
3668                                 if (Character.digit(currentCharacter, 16) == -1)
3669                                         throw new InvalidInputException(INVALID_HEXA);
3670                                 // ---end forcing--
3671                                 while (getNextCharAsDigit(16)) {
3672                                 }
3673                                 ;
3674                                 // if (getNextChar('l', 'L') >= 0)
3675                                 // return TokenNameLongLiteral;
3676                                 // else
3677                                 return TokenNameIntegerLiteral;
3678                         }
3679                         // there is x or X in the number
3680                         // potential octal ! ... some one may write 000099.0 ! thus 00100 <
3681                         // 00078.0 is true !!!!! crazy language
3682                         if (getNextCharAsDigit()) {
3683                                 // -------------potential octal-----------------
3684                                 while (getNextCharAsDigit()) {
3685                                 }
3686                                 ;
3687                                 // if (getNextChar('l', 'L') >= 0) {
3688                                 // return TokenNameLongLiteral;
3689                                 // }
3690                                 //
3691                                 // if (getNextChar('f', 'F') >= 0) {
3692                                 // return TokenNameFloatingPointLiteral;
3693                                 // }
3694                                 if (getNextChar('d', 'D') >= 0) {
3695                                         return TokenNameDoubleLiteral;
3696                                 } else { // make the distinction between octal and float ....
3697                                         if (getNextChar('.')) { // bingo ! ....
3698                                                 while (getNextCharAsDigit()) {
3699                                                 }
3700                                                 ;
3701                                                 if (getNextChar('e', 'E') >= 0) {
3702                                                         // consume next character
3703                                                         unicodeAsBackSlash = false;
3704                                                         currentCharacter = source[currentPosition++];
3705                                                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3706                                                         // && (source[currentPosition] == 'u')) {
3707                                                         // getNextUnicodeChar();
3708                                                         // } else {
3709                                                         // if (withoutUnicodePtr != 0) {
3710                                                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3711                                                         // }
3712                                                         // }
3713                                                         if ((currentCharacter == '-') || (currentCharacter == '+')) {
3714                                                                 // consume next character
3715                                                                 unicodeAsBackSlash = false;
3716                                                                 currentCharacter = source[currentPosition++];
3717                                                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3718                                                                 // && (source[currentPosition] == 'u')) {
3719                                                                 // getNextUnicodeChar();
3720                                                                 // } else {
3721                                                                 // if (withoutUnicodePtr != 0) {
3722                                                                 // withoutUnicodeBuffer[++withoutUnicodePtr] =
3723                                                                 // currentCharacter;
3724                                                                 // }
3725                                                                 // }
3726                                                         }
3727                                                         if (!Character.isDigit(currentCharacter))
3728                                                                 throw new InvalidInputException(INVALID_FLOAT);
3729                                                         while (getNextCharAsDigit()) {
3730                                                         }
3731                                                         ;
3732                                                 }
3733                                                 // if (getNextChar('f', 'F') >= 0)
3734                                                 // return TokenNameFloatingPointLiteral;
3735                                                 getNextChar('d', 'D'); // jump over potential d or D
3736                                                 return TokenNameDoubleLiteral;
3737                                         } else {
3738                                                 return TokenNameIntegerLiteral;
3739                                         }
3740                                 }
3741                         } else {
3742                                 /* carry on */
3743                         }
3744                 }
3745                 while (getNextCharAsDigit()) {
3746                 }
3747                 ;
3748                 // if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3749                 // return TokenNameLongLiteral;
3750                 if ((!dotPrefix) && (getNextChar('.'))) { // decimal part that can be empty
3751                         while (getNextCharAsDigit()) {
3752                         }
3753                         ;
3754                         floating = true;
3755                 }
3756                 // if floating is true both exponant and suffix may be optional
3757                 if (getNextChar('e', 'E') >= 0) {
3758                         floating = true;
3759                         // consume next character
3760                         unicodeAsBackSlash = false;
3761                         currentCharacter = source[currentPosition++];
3762                         // if (((currentCharacter = source[currentPosition++]) == '\\')
3763                         // && (source[currentPosition] == 'u')) {
3764                         // getNextUnicodeChar();
3765                         // } else {
3766                         // if (withoutUnicodePtr != 0) {
3767                         // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3768                         // }
3769                         // }
3770                         if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3771                                 // next
3772                                 // character
3773                                 unicodeAsBackSlash = false;
3774                                 currentCharacter = source[currentPosition++];
3775                                 // if (((currentCharacter = source[currentPosition++]) == '\\')
3776                                 // && (source[currentPosition] == 'u')) {
3777                                 // getNextUnicodeChar();
3778                                 // } else {
3779                                 // if (withoutUnicodePtr != 0) {
3780                                 // withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3781                                 // }
3782                                 // }
3783                         }
3784                         if (!Character.isDigit(currentCharacter))
3785                                 throw new InvalidInputException(INVALID_FLOAT);
3786                         while (getNextCharAsDigit()) {
3787                         }
3788                         ;
3789                 }
3790                 if (getNextChar('d', 'D') >= 0)
3791                         return TokenNameDoubleLiteral;
3792                 // if (getNextChar('f', 'F') >= 0)
3793                 // return TokenNameFloatingPointLiteral;
3794                 // the long flag has been tested before
3795                 return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3796         }
3797
3798         /**
3799          * Search the line number corresponding to a specific position
3800          *
3801          */
3802         public final int getLineNumber(int position) {
3803                 if (lineEnds == null)
3804                         return 1;
3805                 int length = linePtr + 1;
3806                 if (length == 0)
3807                         return 1;
3808                 int g = 0, d = length - 1;
3809                 int m = 0;
3810                 while (g <= d) {
3811                         m = (g + d) / 2;
3812                         if (position < lineEnds[m]) {
3813                                 d = m - 1;
3814                         } else if (position > lineEnds[m]) {
3815                                 g = m + 1;
3816                         } else {
3817                                 return m + 1;
3818                         }
3819                 }
3820                 if (position < lineEnds[m]) {
3821                         return m + 1;
3822                 }
3823                 return m + 2;
3824         }
3825
3826         public void setPHPMode(boolean mode) {
3827                 phpMode = mode;
3828         }
3829
3830         public final void setSource(char[] source) {
3831                 setSource(null, source);
3832         }
3833
3834         public final void setSource(ICompilationUnit compilationUnit, char[] source) {
3835                 // the source-buffer is set to sourceString
3836                 this.compilationUnit = compilationUnit;
3837                 if (source == null) {
3838                         this.source = new char[0];
3839                 } else {
3840                         this.source = source;
3841                 }
3842                 startPosition = -1;
3843                 initialPosition = currentPosition = 0;
3844                 containsAssertKeyword = false;
3845                 withoutUnicodeBuffer = new char[this.source.length];
3846                 // encapsedStringStack = new Stack();
3847         }
3848
3849         public String toString() {
3850                 if (startPosition == source.length)
3851                         return "EOF\n\n" + new String(source); //$NON-NLS-1$
3852                 if (currentPosition > source.length)
3853                         return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3854                 char front[] = new char[startPosition];
3855                 System.arraycopy(source, 0, front, 0, startPosition);
3856                 int middleLength = (currentPosition - 1) - startPosition + 1;
3857                 char middle[];
3858                 if (middleLength > -1) {
3859                         middle = new char[middleLength];
3860                         System.arraycopy(source, startPosition, middle, 0, middleLength);
3861                 } else {
3862                         middle = new char[0];
3863                 }
3864                 char end[] = new char[source.length - (currentPosition - 1)];
3865                 System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3866                 return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3867                                 + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3868                                 + new String(end);
3869         }
3870
3871         public final String toStringAction(int act) {
3872                 switch (act) {
3873                 case TokenNameERROR:
3874                         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3875                 // //$NON-NLS-1$
3876                 case TokenNameINLINE_HTML:
3877                         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3878                 case TokenNameIdentifier:
3879                         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3880                 case TokenNameVariable:
3881                         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3882                 case TokenNameabstract:
3883                         return "abstract"; //$NON-NLS-1$
3884                 case TokenNameand:
3885                         return "AND"; //$NON-NLS-1$
3886                 case TokenNamearray:
3887                         return "array"; //$NON-NLS-1$
3888                 case TokenNameas:
3889                         return "as"; //$NON-NLS-1$
3890                 case TokenNamebreak:
3891                         return "break"; //$NON-NLS-1$
3892                 case TokenNamecase:
3893                         return "case"; //$NON-NLS-1$
3894                 case TokenNameclass:
3895                         return "class"; //$NON-NLS-1$
3896                 case TokenNamecatch:
3897                         return "catch"; //$NON-NLS-1$
3898                 case TokenNameclone:
3899                         //$NON-NLS-1$
3900                         return "clone";
3901                 case TokenNameconst:
3902                         //$NON-NLS-1$
3903                         return "const";
3904                 case TokenNamecontinue:
3905                         return "continue"; //$NON-NLS-1$
3906                 case TokenNamedefault:
3907                         return "default"; //$NON-NLS-1$
3908                 // case TokenNamedefine :
3909                 // return "define"; //$NON-NLS-1$
3910                 case TokenNamedo:
3911                         return "do"; //$NON-NLS-1$
3912                 case TokenNameecho:
3913                         return "echo"; //$NON-NLS-1$
3914                 case TokenNameelse:
3915                         return "else"; //$NON-NLS-1$
3916                 case TokenNameelseif:
3917                         return "elseif"; //$NON-NLS-1$
3918                 case TokenNameendfor:
3919                         return "endfor"; //$NON-NLS-1$
3920                 case TokenNameendforeach:
3921                         return "endforeach"; //$NON-NLS-1$
3922                 case TokenNameendif:
3923                         return "endif"; //$NON-NLS-1$
3924                 case TokenNameendswitch:
3925                         return "endswitch"; //$NON-NLS-1$
3926                 case TokenNameendwhile:
3927                         return "endwhile"; //$NON-NLS-1$
3928                 case TokenNameexit:
3929                         return "exit";
3930                 case TokenNameextends:
3931                         return "extends"; //$NON-NLS-1$
3932                 // case TokenNamefalse :
3933                 // return "false"; //$NON-NLS-1$
3934                 case TokenNamefinal:
3935                         return "final"; //$NON-NLS-1$
3936                 case TokenNamefor:
3937                         return "for"; //$NON-NLS-1$
3938                 case TokenNameforeach:
3939                         return "foreach"; //$NON-NLS-1$
3940                 case TokenNamefunction:
3941                         return "function"; //$NON-NLS-1$
3942                 case TokenNameglobal:
3943                         return "global"; //$NON-NLS-1$
3944                 case TokenNameif:
3945                         return "if"; //$NON-NLS-1$
3946                 case TokenNameimplements:
3947                         return "implements"; //$NON-NLS-1$
3948                 case TokenNameinclude:
3949                         return "include"; //$NON-NLS-1$
3950                 case TokenNameinclude_once:
3951                         return "include_once"; //$NON-NLS-1$
3952                 case TokenNameinstanceof:
3953                         return "instanceof"; //$NON-NLS-1$
3954                 case TokenNameinterface:
3955                         return "interface"; //$NON-NLS-1$
3956                 case TokenNameisset:
3957                         return "isset"; //$NON-NLS-1$
3958                 case TokenNamelist:
3959                         return "list"; //$NON-NLS-1$
3960                 case TokenNamenew:
3961                         return "new"; //$NON-NLS-1$
3962                 // case TokenNamenull :
3963                 // return "null"; //$NON-NLS-1$
3964                 case TokenNameor:
3965                         return "OR"; //$NON-NLS-1$
3966                 case TokenNameprint:
3967                         return "print"; //$NON-NLS-1$
3968                 case TokenNameprivate:
3969                         return "private"; //$NON-NLS-1$
3970                 case TokenNameprotected:
3971                         return "protected"; //$NON-NLS-1$
3972                 case TokenNamepublic:
3973                         return "public"; //$NON-NLS-1$
3974                 case TokenNamerequire:
3975                         return "require"; //$NON-NLS-1$
3976                 case TokenNamerequire_once:
3977                         return "require_once"; //$NON-NLS-1$
3978                 case TokenNamereturn:
3979                         return "return"; //$NON-NLS-1$
3980 //              case TokenNameself:
3981 //                      return "self"; //$NON-NLS-1$
3982                 case TokenNamestatic:
3983                         return "static"; //$NON-NLS-1$
3984                 case TokenNameswitch:
3985                         return "switch"; //$NON-NLS-1$
3986                 // case TokenNametrue :
3987                 // return "true"; //$NON-NLS-1$
3988                 case TokenNameunset:
3989                         return "unset"; //$NON-NLS-1$
3990                 case TokenNamevar:
3991                         return "var"; //$NON-NLS-1$
3992                 case TokenNamewhile:
3993                         return "while"; //$NON-NLS-1$
3994                 case TokenNamexor:
3995                         return "XOR"; //$NON-NLS-1$
3996                 // case TokenNamethis :
3997                 // return "$this"; //$NON-NLS-1$
3998                 case TokenNameIntegerLiteral:
3999                         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4000                 case TokenNameDoubleLiteral:
4001                         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4002                 case TokenNameStringDoubleQuote:
4003                         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4004                 case TokenNameStringSingleQuote:
4005                         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4006                 case TokenNameStringInterpolated:
4007                         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4008                 case TokenNameEncapsedString0:
4009                         return "`"; //$NON-NLS-1$
4010                 // case TokenNameEncapsedString1:
4011                 // return "\'"; //$NON-NLS-1$
4012                 // case TokenNameEncapsedString2:
4013                 // return "\""; //$NON-NLS-1$
4014                 case TokenNameSTRING:
4015                         return "STRING_DQ(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
4016                 case TokenNameHEREDOC:
4017                         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4018                 case TokenNamePLUS_PLUS:
4019                         return "++"; //$NON-NLS-1$
4020                 case TokenNameMINUS_MINUS:
4021                         return "--"; //$NON-NLS-1$
4022                 case TokenNameEQUAL_EQUAL:
4023                         return "=="; //$NON-NLS-1$
4024                 case TokenNameEQUAL_EQUAL_EQUAL:
4025                         return "==="; //$NON-NLS-1$
4026                 case TokenNameEQUAL_GREATER:
4027                         return "=>"; //$NON-NLS-1$
4028                 case TokenNameLESS_EQUAL:
4029                         return "<="; //$NON-NLS-1$
4030                 case TokenNameGREATER_EQUAL:
4031                         return ">="; //$NON-NLS-1$
4032                 case TokenNameNOT_EQUAL:
4033                         return "!="; //$NON-NLS-1$
4034                 case TokenNameNOT_EQUAL_EQUAL:
4035                         return "!=="; //$NON-NLS-1$
4036                 case TokenNameLEFT_SHIFT:
4037                         return "<<"; //$NON-NLS-1$
4038                 case TokenNameRIGHT_SHIFT:
4039                         return ">>"; //$NON-NLS-1$
4040                 case TokenNamePLUS_EQUAL:
4041                         return "+="; //$NON-NLS-1$
4042                 case TokenNameMINUS_EQUAL:
4043                         return "-="; //$NON-NLS-1$
4044                 case TokenNameMULTIPLY_EQUAL:
4045                         return "*="; //$NON-NLS-1$
4046                 case TokenNameDIVIDE_EQUAL:
4047                         return "/="; //$NON-NLS-1$
4048                 case TokenNameAND_EQUAL:
4049                         return "&="; //$NON-NLS-1$
4050                 case TokenNameOR_EQUAL:
4051                         return "|="; //$NON-NLS-1$
4052                 case TokenNameXOR_EQUAL:
4053                         return "^="; //$NON-NLS-1$
4054                 case TokenNameREMAINDER_EQUAL:
4055                         return "%="; //$NON-NLS-1$
4056                 case TokenNameDOT_EQUAL:
4057                         return ".="; //$NON-NLS-1$
4058                 case TokenNameLEFT_SHIFT_EQUAL:
4059                         return "<<="; //$NON-NLS-1$
4060                 case TokenNameRIGHT_SHIFT_EQUAL:
4061                         return ">>="; //$NON-NLS-1$
4062                 case TokenNameOR_OR:
4063                         return "||"; //$NON-NLS-1$
4064                 case TokenNameAND_AND:
4065                         return "&&"; //$NON-NLS-1$
4066                 case TokenNamePLUS:
4067                         return "+"; //$NON-NLS-1$
4068                 case TokenNameMINUS:
4069                         return "-"; //$NON-NLS-1$
4070                 case TokenNameMINUS_GREATER:
4071                         return "->";
4072                 case TokenNameNOT:
4073                         return "!"; //$NON-NLS-1$
4074                 case TokenNameREMAINDER:
4075                         return "%"; //$NON-NLS-1$
4076                 case TokenNameXOR:
4077                         return "^"; //$NON-NLS-1$
4078                 case TokenNameAND:
4079                         return "&"; //$NON-NLS-1$
4080                 case TokenNameMULTIPLY:
4081                         return "*"; //$NON-NLS-1$
4082                 case TokenNameOR:
4083                         return "|"; //$NON-NLS-1$
4084                 case TokenNameTWIDDLE:
4085                         return "~"; //$NON-NLS-1$
4086                 case TokenNameTWIDDLE_EQUAL:
4087                         return "~="; //$NON-NLS-1$
4088                 case TokenNameDIVIDE:
4089                         return "/"; //$NON-NLS-1$
4090                 case TokenNameGREATER:
4091                         return ">"; //$NON-NLS-1$
4092                 case TokenNameLESS:
4093                         return "<"; //$NON-NLS-1$
4094                 case TokenNameLPAREN:
4095                         return "("; //$NON-NLS-1$
4096                 case TokenNameRPAREN:
4097                         return ")"; //$NON-NLS-1$
4098                 case TokenNameLBRACE:
4099                         return "{"; //$NON-NLS-1$
4100                 case TokenNameRBRACE:
4101                         return "}"; //$NON-NLS-1$
4102                 case TokenNameLBRACKET:
4103                         return "["; //$NON-NLS-1$
4104                 case TokenNameRBRACKET:
4105                         return "]"; //$NON-NLS-1$
4106                 case TokenNameSEMICOLON:
4107                         return ";"; //$NON-NLS-1$
4108                 case TokenNameQUESTION:
4109                         return "?"; //$NON-NLS-1$
4110                 case TokenNameCOLON:
4111                         return ":"; //$NON-NLS-1$
4112                 case TokenNameCOMMA:
4113                         return ","; //$NON-NLS-1$
4114                 case TokenNameDOT:
4115                         return "."; //$NON-NLS-1$
4116                 case TokenNameEQUAL:
4117                         return "="; //$NON-NLS-1$
4118                 case TokenNameAT:
4119                         return "@";
4120                 case TokenNameDOLLAR:
4121                         return "$";
4122                 case TokenNameDOLLAR_LBRACE:
4123                         return "${";
4124                 case TokenNameLBRACE_DOLLAR:
4125                         return "{$";
4126                 case TokenNameEOF:
4127                         return "EOF"; //$NON-NLS-1$
4128                 case TokenNameWHITESPACE:
4129                         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4130                 case TokenNameCOMMENT_LINE:
4131                         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4132                 case TokenNameCOMMENT_BLOCK:
4133                         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4134                 case TokenNameCOMMENT_PHPDOC:
4135                         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
4136                 // case TokenNameHTML :
4137                 // return "HTML(" + new String(getCurrentTokenSource()) + ")";
4138                 // //$NON-NLS-1$
4139                 case TokenNameFILE:
4140                         return "__FILE__"; //$NON-NLS-1$
4141                 case TokenNameLINE:
4142                         return "__LINE__"; //$NON-NLS-1$
4143                 case TokenNameCLASS_C:
4144                         return "__CLASS__"; //$NON-NLS-1$
4145                 case TokenNameMETHOD_C:
4146                         return "__METHOD__"; //$NON-NLS-1$
4147                 case TokenNameFUNC_C:
4148                         return "__FUNCTION__"; //$NON-NLS-1
4149                 case TokenNameboolCAST:
4150                         return "( bool )"; //$NON-NLS-1$
4151                 case TokenNameintCAST:
4152                         return "( int )"; //$NON-NLS-1$
4153                 case TokenNamedoubleCAST:
4154                         return "( double )"; //$NON-NLS-1$
4155                 case TokenNameobjectCAST:
4156                         return "( object )"; //$NON-NLS-1$
4157                 case TokenNamestringCAST:
4158                         return "( string )"; //$NON-NLS-1$
4159                 default:
4160                         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
4161                 }
4162         }
4163
4164         public Scanner() {
4165                 this(false, false);
4166         }
4167
4168         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
4169                 this(tokenizeComments, tokenizeWhiteSpace, false);
4170         }
4171
4172         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
4173                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
4174         }
4175
4176         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4177                         boolean assertMode) {
4178                 this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, assertMode, false, null, null, true);
4179         }
4180
4181         public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals,
4182                         boolean assertMode, boolean tokenizeStrings, char[][] taskTags, char[][] taskPriorities, boolean isTaskCaseSensitive) {
4183                 this.eofPosition = Integer.MAX_VALUE;
4184                 this.tokenizeComments = tokenizeComments;
4185                 this.tokenizeWhiteSpace = tokenizeWhiteSpace;
4186                 this.tokenizeStrings = tokenizeStrings;
4187                 this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
4188                 // this.assertMode = assertMode;
4189                 // this.encapsedStringStack = null;
4190                 this.taskTags = taskTags;
4191                 this.taskPriorities = taskPriorities;
4192         }
4193
4194         private void checkNonExternalizeString() throws InvalidInputException {
4195                 if (currentLine == null)
4196                         return;
4197                 parseTags(currentLine);
4198         }
4199
4200         private void parseTags(NLSLine line) throws InvalidInputException {
4201                 String s = new String(getCurrentTokenSource());
4202                 int pos = s.indexOf(TAG_PREFIX);
4203                 int lineLength = line.size();
4204                 while (pos != -1) {
4205                         int start = pos + TAG_PREFIX_LENGTH;
4206                         int end = s.indexOf(TAG_POSTFIX, start);
4207                         String index = s.substring(start, end);
4208                         int i = 0;
4209                         try {
4210                                 i = Integer.parseInt(index) - 1;
4211                                 // Tags are one based not zero based.
4212                         } catch (NumberFormatException e) {
4213                                 i = -1; // we don't want to consider this as a valid NLS tag
4214                         }
4215                         if (line.exists(i)) {
4216                                 line.set(i, null);
4217                         }
4218                         pos = s.indexOf(TAG_PREFIX, start);
4219                 }
4220                 this.nonNLSStrings = new StringLiteral[lineLength];
4221                 int nonNLSCounter = 0;
4222                 for (Iterator iterator = line.iterator(); iterator.hasNext();) {
4223                         StringLiteral literal = (StringLiteral) iterator.next();
4224                         if (literal != null) {
4225                                 this.nonNLSStrings[nonNLSCounter++] = literal;
4226                         }
4227                 }
4228                 if (nonNLSCounter == 0) {
4229                         this.nonNLSStrings = null;
4230                         currentLine = null;
4231                         return;
4232                 }
4233                 this.wasNonExternalizedStringLiteral = true;
4234                 if (nonNLSCounter != lineLength) {
4235                         System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
4236                 }
4237                 currentLine = null;
4238         }
4239
4240         public final void scanEscapeCharacter() throws InvalidInputException {
4241                 // the string with "\\u" is a legal string of two chars \ and u
4242                 // thus we use a direct access to the source (for regular cases).
4243                 if (unicodeAsBackSlash) {
4244                         // consume next character
4245                         unicodeAsBackSlash = false;
4246                         // if (((currentCharacter = source[currentPosition++]) == '\\') &&
4247                         // (source[currentPosition] == 'u')) {
4248                         // getNextUnicodeChar();
4249                         // } else {
4250                         if (withoutUnicodePtr != 0) {
4251                                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
4252                                 // }
4253                         }
4254                 } else
4255                         currentCharacter = source[currentPosition++];
4256                 switch (currentCharacter) {
4257                 case 'b':
4258                         currentCharacter = '\b';
4259                         break;
4260                 case 't':
4261                         currentCharacter = '\t';
4262                         break;
4263                 case 'n':
4264                         currentCharacter = '\n';
4265                         break;
4266                 case 'f':
4267                         currentCharacter = '\f';
4268                         break;
4269                 case 'r':
4270                         currentCharacter = '\r';
4271                         break;
4272                 case '\"':
4273                         currentCharacter = '\"';
4274                         break;
4275                 case '\'':
4276                         currentCharacter = '\'';
4277                         break;
4278                 case '\\':
4279                         currentCharacter = '\\';
4280                         break;
4281                 default:
4282                         // -----------octal escape--------------
4283                         // OctalDigit
4284                         // OctalDigit OctalDigit
4285                         // ZeroToThree OctalDigit OctalDigit
4286                         int number = Character.getNumericValue(currentCharacter);
4287                         if (number >= 0 && number <= 7) {
4288                                 boolean zeroToThreeNot = number > 3;
4289                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4290                                         int digit = Character.getNumericValue(currentCharacter);
4291                                         if (digit >= 0 && digit <= 7) {
4292                                                 number = (number * 8) + digit;
4293                                                 if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4294                                                         if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4295                                                                 // Digit --> ignore last character
4296                                                                 currentPosition--;
4297                                                         } else {
4298                                                                 digit = Character.getNumericValue(currentCharacter);
4299                                                                 if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4300                                                                         // OctalDigit OctalDigit
4301                                                                         number = (number * 8) + digit;
4302                                                                 } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4303                                                                         // --> ignore last character
4304                                                                         currentPosition--;
4305                                                                 }
4306                                                         }
4307                                                 } else { // has read \OctalDigit NonDigit--> ignore last
4308                                                         // character
4309                                                         currentPosition--;
4310                                                 }
4311                                         } else { // has read \OctalDigit NonOctalDigit--> ignore last
4312                                                 // character
4313                                                 currentPosition--;
4314                                         }
4315                                 } else { // has read \OctalDigit --> ignore last character
4316                                         currentPosition--;
4317                                 }
4318                                 if (number > 255)
4319                                         throw new InvalidInputException(INVALID_ESCAPE);
4320                                 currentCharacter = (char) number;
4321                         } else
4322                                 throw new InvalidInputException(INVALID_ESCAPE);
4323                 }
4324         }
4325
4326         // chech presence of task: tags
4327         // TODO (frederic) see if we need to take unicode characters into account...
4328         public void checkTaskTag(int commentStart, int commentEnd) {
4329                 char[] src = this.source;
4330
4331                 // only look for newer task: tags
4332                 if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4333                         return;
4334                 }
4335                 int foundTaskIndex = this.foundTaskCount;
4336                 char previous = src[commentStart + 1]; // should be '*' or '/'
4337                 nextChar: for (int i = commentStart + 2; i < commentEnd && i < this.eofPosition; i++) {
4338                         char[] tag = null;
4339                         char[] priority = null;
4340                         // check for tag occurrence only if not ambiguous with javadoc tag
4341                         if (previous != '@') {
4342                                 nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4343                                         tag = this.taskTags[itag];
4344                                         int tagLength = tag.length;
4345                                         if (tagLength == 0)
4346                                                 continue nextTag;
4347
4348                                         // ensure tag is not leaded with letter if tag starts with a letter
4349                                         if (Scanner.isPHPIdentifierStart(tag[0])) {
4350                                                 if (Scanner.isPHPIdentifierPart(previous)) {
4351                                                         continue nextTag;
4352                                                 }
4353                                         }
4354
4355                                         for (int t = 0; t < tagLength; t++) {
4356                                                 char sc, tc;
4357                                                 int x = i + t;
4358                                                 if (x >= this.eofPosition || x >= commentEnd)
4359                                                         continue nextTag;
4360                                                 if ((sc = src[i + t]) != (tc = tag[t])) { // case sensitive check
4361                                                         if (this.isTaskCaseSensitive || (Character.toLowerCase(sc) != Character.toLowerCase(tc))) { // case
4362                                                                 // insensitive
4363                                                                 // check
4364                                                                 continue nextTag;
4365                                                         }
4366                                                 }
4367                                         }
4368                                         // ensure tag is not followed with letter if tag finishes with a
4369                                         // letter
4370                                         if (i + tagLength < commentEnd && Scanner.isPHPIdentifierPart(src[i + tagLength - 1])) {
4371                                                 if (Scanner.isPHPIdentifierPart(src[i + tagLength]))
4372                                                         continue nextTag;
4373                                         }
4374                                         if (this.foundTaskTags == null) {
4375                                                 this.foundTaskTags = new char[5][];
4376                                                 this.foundTaskMessages = new char[5][];
4377                                                 this.foundTaskPriorities = new char[5][];
4378                                                 this.foundTaskPositions = new int[5][];
4379                                         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4380                                                 System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4381                                                                 this.foundTaskCount);
4382                                                 System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4383                                                                 this.foundTaskCount);
4384                                                 System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities = new char[this.foundTaskCount * 2][], 0,
4385                                                                 this.foundTaskCount);
4386                                                 System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4387                                                                 this.foundTaskCount);
4388                                         }
4389
4390                                         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
4391
4392                                         this.foundTaskTags[this.foundTaskCount] = tag;
4393                                         this.foundTaskPriorities[this.foundTaskCount] = priority;
4394                                         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
4395                                         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4396                                         this.foundTaskCount++;
4397                                         i += tagLength - 1; // will be incremented when looping
4398                                         break nextTag;
4399                                 }
4400                         }
4401                         previous = src[i];
4402                 }
4403                 for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4404                         // retrieve message start and end positions
4405                         int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
4406                         int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
4407                         // at most beginning of next task
4408                         if (max_value < msgStart) {
4409                                 max_value = msgStart; // would only occur if tag is before EOF.
4410                         }
4411                         int end = -1;
4412                         char c;
4413                         for (int j = msgStart; j < max_value; j++) {
4414                                 if ((c = src[j]) == '\n' || c == '\r') {
4415                                         end = j - 1;
4416                                         break;
4417                                 }
4418                         }
4419                         if (end == -1) {
4420                                 for (int j = max_value; j > msgStart; j--) {
4421                                         if ((c = src[j]) == '*') {
4422                                                 end = j - 1;
4423                                                 break;
4424                                         }
4425                                 }
4426                                 if (end == -1)
4427                                         end = max_value;
4428                         }
4429                         if (msgStart == end)
4430                                 continue; // empty
4431                         // trim the message
4432                         while (CharOperation.isWhitespace(src[end]) && msgStart <= end)
4433                                 end--;
4434                         while (CharOperation.isWhitespace(src[msgStart]) && msgStart <= end)
4435                                 msgStart++;
4436                         // update the end position of the task
4437                         this.foundTaskPositions[i][1] = end;
4438                         // get the message source
4439                         final int messageLength = end - msgStart + 1;
4440                         char[] message = new char[messageLength];
4441                         System.arraycopy(src, msgStart, message, 0, messageLength);
4442                         this.foundTaskMessages[i] = message;
4443                 }
4444         }
4445
4446         // chech presence of task: tags
4447         // public void checkTaskTag(int commentStart, int commentEnd) {
4448         // // only look for newer task: tags
4449         // if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount
4450         // - 1][0] >= commentStart) {
4451         // return;
4452         // }
4453         // int foundTaskIndex = this.foundTaskCount;
4454         // nextChar: for (int i = commentStart; i < commentEnd && i <
4455         // this.eofPosition; i++) {
4456         // char[] tag = null;
4457         // char[] priority = null;
4458         // // check for tag occurrence
4459         // nextTag: for (int itag = 0; itag < this.taskTags.length; itag++) {
4460         // tag = this.taskTags[itag];
4461         // priority = this.taskPriorities != null && itag < this.taskPriorities.length
4462         // ? this.taskPriorities[itag] : null;
4463         // int tagLength = tag.length;
4464         // for (int t = 0; t < tagLength; t++) {
4465         // if (this.source[i + t] != tag[t])
4466         // continue nextTag;
4467         // }
4468         // if (this.foundTaskTags == null) {
4469         // this.foundTaskTags = new char[5][];
4470         // this.foundTaskMessages = new char[5][];
4471         // this.foundTaskPriorities = new char[5][];
4472         // this.foundTaskPositions = new int[5][];
4473         // } else if (this.foundTaskCount == this.foundTaskTags.length) {
4474         // System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new
4475         // char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
4476         // System.arraycopy(this.foundTaskMessages, 0, this.foundTaskMessages = new
4477         // char[this.foundTaskCount * 2][], 0,
4478         // this.foundTaskCount);
4479         // System.arraycopy(this.foundTaskPriorities, 0, this.foundTaskPriorities =
4480         // new char[this.foundTaskCount * 2][], 0,
4481         // this.foundTaskCount);
4482         // System.arraycopy(this.foundTaskPositions, 0, this.foundTaskPositions = new
4483         // int[this.foundTaskCount * 2][], 0,
4484         // this.foundTaskCount);
4485         // }
4486         // this.foundTaskTags[this.foundTaskCount] = tag;
4487         // this.foundTaskPriorities[this.foundTaskCount] = priority;
4488         // this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength
4489         // - 1 };
4490         // this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4491         // this.foundTaskCount++;
4492         // i += tagLength - 1; // will be incremented when looping
4493         // }
4494         // }
4495         // for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4496         // // retrieve message start and end positions
4497         // int msgStart = this.foundTaskPositions[i][0] +
4498         // this.foundTaskTags[i].length;
4499         // int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i +
4500         // 1][0] - 1 : commentEnd - 1;
4501         // // at most beginning of next task
4502         // if (max_value < msgStart)
4503         // max_value = msgStart; // would only occur if tag is before EOF.
4504         // int end = -1;
4505         // char c;
4506         // for (int j = msgStart; j < max_value; j++) {
4507         // if ((c = this.source[j]) == '\n' || c == '\r') {
4508         // end = j - 1;
4509         // break;
4510         // }
4511         // }
4512         // if (end == -1) {
4513         // for (int j = max_value; j > msgStart; j--) {
4514         // if ((c = this.source[j]) == '*') {
4515         // end = j - 1;
4516         // break;
4517         // }
4518         // }
4519         // if (end == -1)
4520         // end = max_value;
4521         // }
4522         // if (msgStart == end)
4523         // continue; // empty
4524         // // trim the message
4525         // while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4526         // end--;
4527         // while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4528         // msgStart++;
4529         // // update the end position of the task
4530         // this.foundTaskPositions[i][1] = end;
4531         // // get the message source
4532         // final int messageLength = end - msgStart + 1;
4533         // char[] message = new char[messageLength];
4534         // System.arraycopy(source, msgStart, message, 0, messageLength);
4535         // this.foundTaskMessages[i] = message;
4536         // }
4537         // }
4538 }