New version 1.0.8
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19
20 public class Scanner implements IScanner, ITerminalSymbols {
21
22   /* APIs ares
23    - getNextToken() which return the current type of the token
24      (this value is not memorized by the scanner)
25    - getCurrentTokenSource() which provides with the token "REAL" source
26      (aka all unicode have been transformed into a correct char)
27    - sourceStart gives the position into the stream
28    - currentPosition-1 gives the sourceEnd position into the stream 
29   */
30
31   // 1.4 feature 
32   private boolean assertMode;
33   public boolean useAssertAsAnIndentifier = false;
34   //flag indicating if processed source contains occurrences of keyword assert 
35   public boolean containsAssertKeyword = false;
36
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the source
45
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52
53   //unicode support
54   public char[] withoutUnicodeBuffer;
55   public int withoutUnicodePtr;
56   //when == 0 ==> no unicode in the current token
57   public boolean unicodeAsBackSlash = false;
58
59   public boolean scanningFloatLiteral = false;
60
61   //support for /** comments
62   //public char[][] comments = new char[10][];
63   public int[] commentStops = new int[10];
64   public int[] commentStarts = new int[10];
65   public int commentPtr = -1; // no comment test with commentPtr value -1
66
67   //diet parsing support - jump over some method body when requested
68   public boolean diet = false;
69
70   //support for the  poor-line-debuggers ....
71   //remember the position of the cr/lf
72   public int[] lineEnds = new int[250];
73   public int linePtr = -1;
74   public boolean wasAcr = false;
75
76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
77
78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
85
86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
90
91   //----------------optimized identifier managment------------------
92   static final char[] charArray_a = new char[] { 'a' },
93     charArray_b = new char[] { 'b' },
94     charArray_c = new char[] { 'c' },
95     charArray_d = new char[] { 'd' },
96     charArray_e = new char[] { 'e' },
97     charArray_f = new char[] { 'f' },
98     charArray_g = new char[] { 'g' },
99     charArray_h = new char[] { 'h' },
100     charArray_i = new char[] { 'i' },
101     charArray_j = new char[] { 'j' },
102     charArray_k = new char[] { 'k' },
103     charArray_l = new char[] { 'l' },
104     charArray_m = new char[] { 'm' },
105     charArray_n = new char[] { 'n' },
106     charArray_o = new char[] { 'o' },
107     charArray_p = new char[] { 'p' },
108     charArray_q = new char[] { 'q' },
109     charArray_r = new char[] { 'r' },
110     charArray_s = new char[] { 's' },
111     charArray_t = new char[] { 't' },
112     charArray_u = new char[] { 'u' },
113     charArray_v = new char[] { 'v' },
114     charArray_w = new char[] { 'w' },
115     charArray_x = new char[] { 'x' },
116     charArray_y = new char[] { 'y' },
117     charArray_z = new char[] { 'z' };
118
119   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120   static final int TableSize = 30, InternalTableSize = 6;
121   //30*6 = 180 entries
122   public static final int OptimizedLength = 6;
123   public /*static*/
124   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125   // support for detecting non-externalized string literals
126   int currentLineNr = -1;
127   int previousLineNr = -1;
128   NLSLine currentLine = null;
129   List lines = new ArrayList();
130   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134   public StringLiteral[] nonNLSStrings = null;
135   public boolean checkNonExternalizedStringLiterals = true;
136   public boolean wasNonExternalizedStringLiteral = false;
137
138   /*static*/ {
139     for (int i = 0; i < 6; i++) {
140       for (int j = 0; j < TableSize; j++) {
141         for (int k = 0; k < InternalTableSize; k++) {
142           charArray_length[i][j][k] = initCharArray;
143         }
144       }
145     }
146   }
147   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
148
149   public static final int RoundBracket = 0;
150   public static final int SquareBracket = 1;
151   public static final int CurlyBracket = 2;
152   public static final int BracketKinds = 3;
153
154   public static final boolean DEBUG = false;
155
156   public Scanner() {
157     this(false, false);
158   }
159   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
160     this(tokenizeComments, tokenizeWhiteSpace, false);
161   }
162
163   /**
164    * Determines if the specified character is
165    * permissible as the first character in a PHP identifier
166    */
167   public static boolean isPHPIdentifierStart(char ch) {
168     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
169   }
170
171   /**
172    * Determines if the specified character may be part of a PHP identifier as
173    * other than the first character
174    */
175   public static boolean isPHPIdentifierPart(char ch) {
176     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
177   }
178
179   public final boolean atEnd() {
180     // This code is not relevant if source is 
181     // Only a part of the real stream input
182
183     return source.length == currentPosition;
184   }
185   public char[] getCurrentIdentifierSource() {
186     //return the token REAL source (aka unicodes are precomputed)
187
188     char[] result;
189     //    if (withoutUnicodePtr != 0)
190     //      //0 is used as a fast test flag so the real first char is in position 1
191     //      System.arraycopy(
192     //        withoutUnicodeBuffer,
193     //        1,
194     //        result = new char[withoutUnicodePtr],
195     //        0,
196     //        withoutUnicodePtr);
197     //    else {
198     int length = currentPosition - startPosition;
199     switch (length) { // see OptimizedLength
200       case 1 :
201         return optimizedCurrentTokenSource1();
202       case 2 :
203         return optimizedCurrentTokenSource2();
204       case 3 :
205         return optimizedCurrentTokenSource3();
206       case 4 :
207         return optimizedCurrentTokenSource4();
208       case 5 :
209         return optimizedCurrentTokenSource5();
210       case 6 :
211         return optimizedCurrentTokenSource6();
212     }
213     //no optimization
214     System.arraycopy(source, startPosition, result = new char[length], 0, length);
215     //   }
216     return result;
217   }
218   public int getCurrentTokenEndPosition() {
219     return this.currentPosition - 1;
220   }
221   
222   
223   public final char[] getCurrentTokenSource() {
224     // Return the token REAL source (aka unicodes are precomputed)
225
226     char[] result;
227     //    if (withoutUnicodePtr != 0)
228     //      // 0 is used as a fast test flag so the real first char is in position 1
229     //      System.arraycopy(
230     //        withoutUnicodeBuffer,
231     //        1,
232     //        result = new char[withoutUnicodePtr],
233     //        0,
234     //        withoutUnicodePtr);
235     //    else {
236     int length;
237     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
238     //    }
239     return result;
240   }
241
242   public final char[] getCurrentTokenSource(int startPos) {
243     // Return the token REAL source (aka unicodes are precomputed)
244
245     char[] result;
246     //    if (withoutUnicodePtr != 0)
247     //      // 0 is used as a fast test flag so the real first char is in position 1
248     //      System.arraycopy(
249     //        withoutUnicodeBuffer,
250     //        1,
251     //        result = new char[withoutUnicodePtr],
252     //        0,
253     //        withoutUnicodePtr);
254     //    else {
255     int length;
256     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
257     //  }
258     return result;
259   }
260
261   public final char[] getCurrentTokenSourceString() {
262     //return the token REAL source (aka unicodes are precomputed).
263     //REMOVE the two " that are at the beginning and the end.
264
265     char[] result;
266     if (withoutUnicodePtr != 0)
267       //0 is used as a fast test flag so the real first char is in position 1
268       System.arraycopy(withoutUnicodeBuffer, 2,
269       //2 is 1 (real start) + 1 (to jump over the ")
270       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
271     else {
272       int length;
273       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
274     }
275     return result;
276   }
277   public int getCurrentTokenStartPosition() {
278     return this.startPosition;
279   }
280   
281         public final char[] getCurrentStringLiteralSource() {
282                 // Return the token REAL source (aka unicodes are precomputed)
283
284                 char[] result;
285
286                 int length;
287                 System.arraycopy(source, startPosition+1, result = new char[length = currentPosition - startPosition - 2], 0, length);
288                 //    }
289                 return result;
290         }
291
292   /*
293    * Search the source position corresponding to the end of a given line number
294    *
295    * Line numbers are 1-based, and relative to the scanner initialPosition. 
296    * Character positions are 0-based.
297    *
298    * In case the given line number is inconsistent, answers -1.
299    */
300   public final int getLineEnd(int lineNumber) {
301
302     if (lineEnds == null)
303       return -1;
304     if (lineNumber >= lineEnds.length)
305       return -1;
306     if (lineNumber <= 0)
307       return -1;
308
309     if (lineNumber == lineEnds.length - 1)
310       return eofPosition;
311     return lineEnds[lineNumber - 1];
312     // next line start one character behind the lineEnd of the previous line
313   }
314   /**
315    * Search the source position corresponding to the beginning of a given line number
316    *
317    * Line numbers are 1-based, and relative to the scanner initialPosition. 
318    * Character positions are 0-based.
319    *
320    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
321    *
322    * In case the given line number is inconsistent, answers -1.
323    */
324   public final int getLineStart(int lineNumber) {
325
326     if (lineEnds == null)
327       return -1;
328     if (lineNumber >= lineEnds.length)
329       return -1;
330     if (lineNumber <= 0)
331       return -1;
332
333     if (lineNumber == 1)
334       return initialPosition;
335     return lineEnds[lineNumber - 2] + 1;
336     // next line start one character behind the lineEnd of the previous line
337   }
338   public final boolean getNextChar(char testedChar) {
339     //BOOLEAN
340     //handle the case of unicode.
341     //when a unicode appears then we must use a buffer that holds char internal values
342     //At the end of this method currentCharacter holds the new visited char
343     //and currentPosition points right next after it
344     //Both previous lines are true if the currentCharacter is == to the testedChar
345     //On false, no side effect has occured.
346
347     //ALL getNextChar.... ARE OPTIMIZED COPIES 
348
349     int temp = currentPosition;
350     try {
351       currentCharacter = source[currentPosition++];
352       //      if (((currentCharacter = source[currentPosition++]) == '\\')
353       //        && (source[currentPosition] == 'u')) {
354       //        //-------------unicode traitement ------------
355       //        int c1, c2, c3, c4;
356       //        int unicodeSize = 6;
357       //        currentPosition++;
358       //        while (source[currentPosition] == 'u') {
359       //          currentPosition++;
360       //          unicodeSize++;
361       //        }
362       //
363       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
364       //          || c1 < 0)
365       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
366       //            || c2 < 0)
367       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
368       //            || c3 < 0)
369       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
370       //            || c4 < 0)) {
371       //          currentPosition = temp;
372       //          return false;
373       //        }
374       //
375       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
376       //        if (currentCharacter != testedChar) {
377       //          currentPosition = temp;
378       //          return false;
379       //        }
380       //        unicodeAsBackSlash = currentCharacter == '\\';
381       //
382       //        //need the unicode buffer
383       //        if (withoutUnicodePtr == 0) {
384       //          //buffer all the entries that have been left aside....
385       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
386       //          System.arraycopy(
387       //            source,
388       //            startPosition,
389       //            withoutUnicodeBuffer,
390       //            1,
391       //            withoutUnicodePtr);
392       //        }
393       //        //fill the buffer with the char
394       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
395       //        return true;
396       //
397       //      } //-------------end unicode traitement--------------
398       //      else {
399       if (currentCharacter != testedChar) {
400         currentPosition = temp;
401         return false;
402       }
403       unicodeAsBackSlash = false;
404       //        if (withoutUnicodePtr != 0)
405       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
406       return true;
407       //      }
408     } catch (IndexOutOfBoundsException e) {
409       unicodeAsBackSlash = false;
410       currentPosition = temp;
411       return false;
412     }
413   }
414   public final int getNextChar(char testedChar1, char testedChar2) {
415     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
416     //test can be done with (x==0) for the first and (x>0) for the second
417     //handle the case of unicode.
418     //when a unicode appears then we must use a buffer that holds char internal values
419     //At the end of this method currentCharacter holds the new visited char
420     //and currentPosition points right next after it
421     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
422     //On false, no side effect has occured.
423
424     //ALL getNextChar.... ARE OPTIMIZED COPIES 
425
426     int temp = currentPosition;
427     try {
428       int result;
429       currentCharacter = source[currentPosition++];
430       //      if (((currentCharacter = source[currentPosition++]) == '\\')
431       //        && (source[currentPosition] == 'u')) {
432       //        //-------------unicode traitement ------------
433       //        int c1, c2, c3, c4;
434       //        int unicodeSize = 6;
435       //        currentPosition++;
436       //        while (source[currentPosition] == 'u') {
437       //          currentPosition++;
438       //          unicodeSize++;
439       //        }
440       //
441       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
442       //          || c1 < 0)
443       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
444       //            || c2 < 0)
445       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
446       //            || c3 < 0)
447       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
448       //            || c4 < 0)) {
449       //          currentPosition = temp;
450       //          return 2;
451       //        }
452       //
453       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
454       //        if (currentCharacter == testedChar1)
455       //          result = 0;
456       //        else if (currentCharacter == testedChar2)
457       //          result = 1;
458       //        else {
459       //          currentPosition = temp;
460       //          return -1;
461       //        }
462       //
463       //        //need the unicode buffer
464       //        if (withoutUnicodePtr == 0) {
465       //          //buffer all the entries that have been left aside....
466       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
467       //          System.arraycopy(
468       //            source,
469       //            startPosition,
470       //            withoutUnicodeBuffer,
471       //            1,
472       //            withoutUnicodePtr);
473       //        }
474       //        //fill the buffer with the char
475       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
476       //        return result;
477       //      } //-------------end unicode traitement--------------
478       //      else {
479       if (currentCharacter == testedChar1)
480         result = 0;
481       else if (currentCharacter == testedChar2)
482         result = 1;
483       else {
484         currentPosition = temp;
485         return -1;
486       }
487
488       //        if (withoutUnicodePtr != 0)
489       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
490       return result;
491       //     }
492     } catch (IndexOutOfBoundsException e) {
493       currentPosition = temp;
494       return -1;
495     }
496   }
497   public final boolean getNextCharAsDigit() {
498     //BOOLEAN
499     //handle the case of unicode.
500     //when a unicode appears then we must use a buffer that holds char internal values
501     //At the end of this method currentCharacter holds the new visited char
502     //and currentPosition points right next after it
503     //Both previous lines are true if the currentCharacter is a digit
504     //On false, no side effect has occured.
505
506     //ALL getNextChar.... ARE OPTIMIZED COPIES 
507
508     int temp = currentPosition;
509     try {
510       currentCharacter = source[currentPosition++];
511       //      if (((currentCharacter = source[currentPosition++]) == '\\')
512       //        && (source[currentPosition] == 'u')) {
513       //        //-------------unicode traitement ------------
514       //        int c1, c2, c3, c4;
515       //        int unicodeSize = 6;
516       //        currentPosition++;
517       //        while (source[currentPosition] == 'u') {
518       //          currentPosition++;
519       //          unicodeSize++;
520       //        }
521       //
522       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
523       //          || c1 < 0)
524       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
525       //            || c2 < 0)
526       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
527       //            || c3 < 0)
528       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
529       //            || c4 < 0)) {
530       //          currentPosition = temp;
531       //          return false;
532       //        }
533       //
534       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
535       //        if (!Character.isDigit(currentCharacter)) {
536       //          currentPosition = temp;
537       //          return false;
538       //        }
539       //
540       //        //need the unicode buffer
541       //        if (withoutUnicodePtr == 0) {
542       //          //buffer all the entries that have been left aside....
543       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
544       //          System.arraycopy(
545       //            source,
546       //            startPosition,
547       //            withoutUnicodeBuffer,
548       //            1,
549       //            withoutUnicodePtr);
550       //        }
551       //        //fill the buffer with the char
552       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
553       //        return true;
554       //      } //-------------end unicode traitement--------------
555       //      else {
556       if (!Character.isDigit(currentCharacter)) {
557         currentPosition = temp;
558         return false;
559       }
560       //        if (withoutUnicodePtr != 0)
561       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
562       return true;
563       //      }
564     } catch (IndexOutOfBoundsException e) {
565       currentPosition = temp;
566       return false;
567     }
568   }
569   public final boolean getNextCharAsDigit(int radix) {
570     //BOOLEAN
571     //handle the case of unicode.
572     //when a unicode appears then we must use a buffer that holds char internal values
573     //At the end of this method currentCharacter holds the new visited char
574     //and currentPosition points right next after it
575     //Both previous lines are true if the currentCharacter is a digit base on radix
576     //On false, no side effect has occured.
577
578     //ALL getNextChar.... ARE OPTIMIZED COPIES 
579
580     int temp = currentPosition;
581     try {
582       currentCharacter = source[currentPosition++];
583       //      if (((currentCharacter = source[currentPosition++]) == '\\')
584       //        && (source[currentPosition] == 'u')) {
585       //        //-------------unicode traitement ------------
586       //        int c1, c2, c3, c4;
587       //        int unicodeSize = 6;
588       //        currentPosition++;
589       //        while (source[currentPosition] == 'u') {
590       //          currentPosition++;
591       //          unicodeSize++;
592       //        }
593       //
594       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
595       //          || c1 < 0)
596       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
597       //            || c2 < 0)
598       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
599       //            || c3 < 0)
600       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
601       //            || c4 < 0)) {
602       //          currentPosition = temp;
603       //          return false;
604       //        }
605       //
606       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
607       //        if (Character.digit(currentCharacter, radix) == -1) {
608       //          currentPosition = temp;
609       //          return false;
610       //        }
611       //
612       //        //need the unicode buffer
613       //        if (withoutUnicodePtr == 0) {
614       //          //buffer all the entries that have been left aside....
615       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
616       //          System.arraycopy(
617       //            source,
618       //            startPosition,
619       //            withoutUnicodeBuffer,
620       //            1,
621       //            withoutUnicodePtr);
622       //        }
623       //        //fill the buffer with the char
624       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
625       //        return true;
626       //      } //-------------end unicode traitement--------------
627       //      else {
628       if (Character.digit(currentCharacter, radix) == -1) {
629         currentPosition = temp;
630         return false;
631       }
632       //        if (withoutUnicodePtr != 0)
633       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
634       return true;
635       //      }
636     } catch (IndexOutOfBoundsException e) {
637       currentPosition = temp;
638       return false;
639     }
640   }
641   public boolean getNextCharAsJavaIdentifierPart() {
642     //BOOLEAN
643     //handle the case of unicode.
644     //when a unicode appears then we must use a buffer that holds char internal values
645     //At the end of this method currentCharacter holds the new visited char
646     //and currentPosition points right next after it
647     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
648     //On false, no side effect has occured.
649
650     //ALL getNextChar.... ARE OPTIMIZED COPIES 
651
652     int temp = currentPosition;
653     try {
654       currentCharacter = source[currentPosition++];
655       //      if (((currentCharacter = source[currentPosition++]) == '\\')
656       //        && (source[currentPosition] == 'u')) {
657       //        //-------------unicode traitement ------------
658       //        int c1, c2, c3, c4;
659       //        int unicodeSize = 6;
660       //        currentPosition++;
661       //        while (source[currentPosition] == 'u') {
662       //          currentPosition++;
663       //          unicodeSize++;
664       //        }
665       //
666       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
667       //          || c1 < 0)
668       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
669       //            || c2 < 0)
670       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
671       //            || c3 < 0)
672       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
673       //            || c4 < 0)) {
674       //          currentPosition = temp;
675       //          return false;
676       //        }
677       //
678       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
679       //        if (!isPHPIdentifierPart(currentCharacter)) {
680       //          currentPosition = temp;
681       //          return false;
682       //        }
683       //
684       //        //need the unicode buffer
685       //        if (withoutUnicodePtr == 0) {
686       //          //buffer all the entries that have been left aside....
687       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
688       //          System.arraycopy(
689       //            source,
690       //            startPosition,
691       //            withoutUnicodeBuffer,
692       //            1,
693       //            withoutUnicodePtr);
694       //        }
695       //        //fill the buffer with the char
696       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
697       //        return true;
698       //      } //-------------end unicode traitement--------------
699       //      else {
700       if (!isPHPIdentifierPart(currentCharacter)) {
701         currentPosition = temp;
702         return false;
703       }
704
705       //        if (withoutUnicodePtr != 0)
706       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
707       return true;
708       //      }
709     } catch (IndexOutOfBoundsException e) {
710       currentPosition = temp;
711       return false;
712     }
713   }
714
715   public int getNextToken() throws InvalidInputException {
716     int htmlPosition = currentPosition;
717     try {
718       while (!phpMode) {
719         currentCharacter = source[currentPosition++];
720         if (currentCharacter == '<') {
721           if (getNextChar('?')) {
722             currentCharacter = source[currentPosition++];
723             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
724               // <?
725               startPosition = currentPosition;
726               phpMode = true;
727               if (tokenizeWhiteSpace) {
728                 // && (whiteStart != currentPosition - 1)) {
729                 // reposition scanner in case we are interested by spaces as tokens
730                 startPosition = htmlPosition;
731                 return TokenNameHTML;
732               }
733             } else {
734               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
735               if (phpStart) {
736                 int test = getNextChar('H', 'h');
737                 if (test >= 0) {
738                   test = getNextChar('P', 'p');
739                   if (test >= 0) {
740                     // <?PHP  <?php
741                     startPosition = currentPosition;
742                     phpMode = true;
743
744                     if (tokenizeWhiteSpace) {
745                       // && (whiteStart != currentPosition - 1)) {
746                       // reposition scanner in case we are interested by spaces as tokens
747                       startPosition = htmlPosition;
748                       return TokenNameHTML;
749                     }
750                   }
751                 }
752               }
753             }
754           }
755         }
756
757         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
758           if (recordLineSeparator) {
759             pushLineSeparator();
760           } else {
761             currentLine = null;
762           }
763         }
764       }
765     } //-----------------end switch while try--------------------
766     catch (IndexOutOfBoundsException e) {
767       if (tokenizeWhiteSpace) {
768         // && (whiteStart != currentPosition - 1)) {
769         // reposition scanner in case we are interested by spaces as tokens
770         startPosition = htmlPosition;
771       }
772       return TokenNameEOF;
773     }
774
775     if (phpMode) {
776       this.wasAcr = false;
777       if (diet) {
778         jumpOverMethodBody();
779         diet = false;
780         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
781       }
782       try {
783         while (true) { //loop for jumping over comments
784           withoutUnicodePtr = 0;
785           //start with a new token (even comment written with unicode )
786
787           // ---------Consume white space and handles startPosition---------
788           int whiteStart = currentPosition;
789           boolean isWhiteSpace;
790           do {
791             startPosition = currentPosition;
792             currentCharacter = source[currentPosition++];
793             //            if (((currentCharacter = source[currentPosition++]) == '\\')
794             //              && (source[currentPosition] == 'u')) {
795             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
796             //            } else {
797             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
798               checkNonExternalizeString();
799               if (recordLineSeparator) {
800                 pushLineSeparator();
801               } else {
802                 currentLine = null;
803               }
804             }
805             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
806             //            }
807           } while (isWhiteSpace);
808           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
809             // reposition scanner in case we are interested by spaces as tokens
810             currentPosition--;
811             startPosition = whiteStart;
812             return TokenNameWHITESPACE;
813           }
814           //little trick to get out in the middle of a source compuation
815           if (currentPosition > eofPosition)
816             return TokenNameEOF;
817
818           // ---------Identify the next token-------------
819
820           switch (currentCharacter) {
821             case '(' :
822               return TokenNameLPAREN;
823             case ')' :
824               return TokenNameRPAREN;
825             case '{' :
826               return TokenNameLBRACE;
827             case '}' :
828               return TokenNameRBRACE;
829             case '[' :
830               return TokenNameLBRACKET;
831             case ']' :
832               return TokenNameRBRACKET;
833             case ';' :
834               return TokenNameSEMICOLON;
835             case ',' :
836               return TokenNameCOMMA;
837
838             case '.' :
839               if (getNextCharAsDigit())
840                 return scanNumber(true);
841               return TokenNameDOT;
842             case '+' :
843               {
844                 int test;
845                 if ((test = getNextChar('+', '=')) == 0)
846                   return TokenNamePLUS_PLUS;
847                 if (test > 0)
848                   return TokenNamePLUS_EQUAL;
849                 return TokenNamePLUS;
850               }
851             case '-' :
852               {
853                 int test;
854                 if ((test = getNextChar('-', '=')) == 0)
855                   return TokenNameMINUS_MINUS;
856                 if (test > 0)
857                   return TokenNameMINUS_EQUAL;
858                 if (getNextChar('>'))
859                   return TokenNameMINUS_GREATER;
860
861                 return TokenNameMINUS;
862               }
863             case '~' :
864               if (getNextChar('='))
865                 return TokenNameTWIDDLE_EQUAL;
866               return TokenNameTWIDDLE;
867             case '!' :
868               if (getNextChar('='))
869                 return TokenNameNOT_EQUAL;
870               return TokenNameNOT;
871             case '*' :
872               if (getNextChar('='))
873                 return TokenNameMULTIPLY_EQUAL;
874               return TokenNameMULTIPLY;
875             case '%' :
876               if (getNextChar('='))
877                 return TokenNameREMAINDER_EQUAL;
878               return TokenNameREMAINDER;
879             case '<' :
880               {
881                 int test;
882                 if ((test = getNextChar('=', '<')) == 0)
883                   return TokenNameLESS_EQUAL;
884                 if (test > 0) {
885                   if (getNextChar('='))
886                     return TokenNameLEFT_SHIFT_EQUAL;
887                   if (getNextChar('<')) {
888                     int heredocStart = currentPosition;
889                     int heredocLength = 0;
890                     currentCharacter = source[currentPosition++];
891                     if (isPHPIdentifierStart(currentCharacter)) {
892                       currentCharacter = source[currentPosition++];
893                     } else {
894                       return TokenNameERROR;
895                     }
896                     while (isPHPIdentifierPart(currentCharacter)) {
897                       currentCharacter = source[currentPosition++];
898                     }
899
900                     heredocLength = currentPosition - heredocStart - 1;
901
902                     // heredoc end-tag determination
903                     boolean endTag = true;
904                     char ch;
905                     do {
906                       ch = source[currentPosition++];
907                       if (ch == '\r' || ch == '\n') {
908                         if (recordLineSeparator) {
909                           pushLineSeparator();
910                         } else {
911                           currentLine = null;
912                         }
913                         for (int i = 0; i < heredocLength; i++) {
914                           if (source[currentPosition + i] != source[heredocStart + i]) {
915                             endTag = false;
916                             break;
917                           }
918                         }
919                         if (endTag) {
920                           currentPosition += heredocLength - 1;
921                           currentCharacter = source[currentPosition++];
922                           break; // do...while loop
923                         } else {
924                           endTag = true;
925                         }
926                       }
927
928                     } while (true);
929
930                     return TokenNameHEREDOC;
931                   }
932                   return TokenNameLEFT_SHIFT;
933                 }
934                 return TokenNameLESS;
935               }
936             case '>' :
937               {
938                 int test;
939                 if ((test = getNextChar('=', '>')) == 0)
940                   return TokenNameGREATER_EQUAL;
941                 if (test > 0) {
942                   if ((test = getNextChar('=', '>')) == 0)
943                     return TokenNameRIGHT_SHIFT_EQUAL;
944                   return TokenNameRIGHT_SHIFT;
945                 }
946                 return TokenNameGREATER;
947               }
948             case '=' :
949               if (getNextChar('='))
950                 return TokenNameEQUAL_EQUAL;
951               if (getNextChar('>'))
952                 return TokenNameEQUAL_GREATER;
953               return TokenNameEQUAL;
954             case '&' :
955               {
956                 int test;
957                 if ((test = getNextChar('&', '=')) == 0)
958                   return TokenNameAND_AND;
959                 if (test > 0)
960                   return TokenNameAND_EQUAL;
961                 return TokenNameAND;
962               }
963             case '|' :
964               {
965                 int test;
966                 if ((test = getNextChar('|', '=')) == 0)
967                   return TokenNameOR_OR;
968                 if (test > 0)
969                   return TokenNameOR_EQUAL;
970                 return TokenNameOR;
971               }
972             case '^' :
973               if (getNextChar('='))
974                 return TokenNameXOR_EQUAL;
975               return TokenNameXOR;
976             case '?' :
977               if (getNextChar('>')) {
978                 phpMode = false;
979                 return TokenNameStopPHP;
980               }
981               return TokenNameQUESTION;
982             case ':' :
983               if (getNextChar(':'))
984                 return TokenNameCOLON_COLON;
985               return TokenNameCOLON;
986             case '@' :
987               return TokenNameAT;
988               //                                        case '\'' :
989               //                                                {
990               //                                                        int test;
991               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
992               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
993               //                                                        }
994               //                                                        if (test > 0) {
995               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
996               //                                                                for (int lookAhead = 0;
997               //                                                                        lookAhead < 3;
998               //                                                                        lookAhead++) {
999               //                                                                        if (currentPosition + lookAhead
1000               //                                                                                == source.length)
1001               //                                                                                break;
1002               //                                                                        if (source[currentPosition + lookAhead]
1003               //                                                                                == '\n')
1004               //                                                                                break;
1005               //                                                                        if (source[currentPosition + lookAhead]
1006               //                                                                                == '\'') {
1007               //                                                                                currentPosition += lookAhead + 1;
1008               //                                                                                break;
1009               //                                                                        }
1010               //                                                                }
1011               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1012               //                                                        }
1013               //                                                }
1014               //                                                if (getNextChar('\'')) {
1015               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1016               //                                                        for (int lookAhead = 0;
1017               //                                                                lookAhead < 3;
1018               //                                                                lookAhead++) {
1019               //                                                                if (currentPosition + lookAhead
1020               //                                                                        == source.length)
1021               //                                                                        break;
1022               //                                                                if (source[currentPosition + lookAhead]
1023               //                                                                        == '\n')
1024               //                                                                        break;
1025               //                                                                if (source[currentPosition + lookAhead]
1026               //                                                                        == '\'') {
1027               //                                                                        currentPosition += lookAhead + 1;
1028               //                                                                        break;
1029               //                                                                }
1030               //                                                        }
1031               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1032               //                                                }
1033               //                                                if (getNextChar('\\'))
1034               //                                                        scanEscapeCharacter();
1035               //                                                else { // consume next character
1036               //                                                        unicodeAsBackSlash = false;
1037               //                                                        if (((currentCharacter = source[currentPosition++])
1038               //                                                                == '\\')
1039               //                                                                && (source[currentPosition] == 'u')) {
1040               //                                                                getNextUnicodeChar();
1041               //                                                        } else {
1042               //                                                                if (withoutUnicodePtr != 0) {
1043               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1044               //                                                                                currentCharacter;
1045               //                                                                }
1046               //                                                        }
1047               //                                                }
1048               //                                                //            if (getNextChar('\''))
1049               //                                                //              return TokenNameCharacterLiteral;
1050               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1051               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1052               //                                                        if (currentPosition + lookAhead == source.length)
1053               //                                                                break;
1054               //                                                        if (source[currentPosition + lookAhead] == '\n')
1055               //                                                                break;
1056               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1057               //                                                                currentPosition += lookAhead + 1;
1058               //                                                                break;
1059               //                                                        }
1060               //                                                }
1061               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1062             case '\'' :
1063               try {
1064                 // consume next character
1065                 unicodeAsBackSlash = false;
1066                 currentCharacter = source[currentPosition++];
1067                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1068                 //                  && (source[currentPosition] == 'u')) {
1069                 //                  getNextUnicodeChar();
1070                 //                } else {
1071                 //                  if (withoutUnicodePtr != 0) {
1072                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1073                 //                      currentCharacter;
1074                 //                  }
1075                 //                }
1076
1077                 while (currentCharacter != '\'') {
1078
1079                   /**** in PHP \r and \n are valid in string literals ****/
1080                   //                  if ((currentCharacter == '\n')
1081                   //                    || (currentCharacter == '\r')) {
1082                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1083                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1084                   //                      if (currentPosition + lookAhead == source.length)
1085                   //                        break;
1086                   //                      if (source[currentPosition + lookAhead] == '\n')
1087                   //                        break;
1088                   //                      if (source[currentPosition + lookAhead] == '\"') {
1089                   //                        currentPosition += lookAhead + 1;
1090                   //                        break;
1091                   //                      }
1092                   //                    }
1093                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1094                   //                  }
1095                   if (currentCharacter == '\\') {
1096                     int escapeSize = currentPosition;
1097                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1098                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1099                     scanSingleQuotedEscapeCharacter();
1100                     escapeSize = currentPosition - escapeSize;
1101                     if (withoutUnicodePtr == 0) {
1102                       //buffer all the entries that have been left aside....
1103                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1104                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1105                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1106                     } else { //overwrite the / in the buffer
1107                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1108                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1109                         withoutUnicodePtr--;
1110                       }
1111                     }
1112                   }
1113                   // consume next character
1114                   unicodeAsBackSlash = false;
1115                   currentCharacter = source[currentPosition++];
1116                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1117                   //                    && (source[currentPosition] == 'u')) {
1118                   //                    getNextUnicodeChar();
1119                   //                  } else {
1120                   if (withoutUnicodePtr != 0) {
1121                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1122                   }
1123                   //                  }
1124
1125                 }
1126               } catch (IndexOutOfBoundsException e) {
1127                 throw new InvalidInputException(UNTERMINATED_STRING);
1128               } catch (InvalidInputException e) {
1129                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1130                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1131                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1132                     if (currentPosition + lookAhead == source.length)
1133                       break;
1134                     if (source[currentPosition + lookAhead] == '\n')
1135                       break;
1136                     if (source[currentPosition + lookAhead] == '\'') {
1137                       currentPosition += lookAhead + 1;
1138                       break;
1139                     }
1140                   }
1141
1142                 }
1143                 throw e; // rethrow
1144               }
1145               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1146                 if (currentLine == null) {
1147                   currentLine = new NLSLine();
1148                   lines.add(currentLine);
1149                 }
1150                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1151               }
1152               return TokenNameStringConstant;
1153             case '"' :
1154               try {
1155                 // consume next character
1156                 unicodeAsBackSlash = false;
1157                 currentCharacter = source[currentPosition++];
1158                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1159                 //                  && (source[currentPosition] == 'u')) {
1160                 //                  getNextUnicodeChar();
1161                 //                } else {
1162                 //                  if (withoutUnicodePtr != 0) {
1163                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1164                 //                      currentCharacter;
1165                 //                  }
1166                 //                }
1167
1168                 while (currentCharacter != '"') {
1169
1170                   /**** in PHP \r and \n are valid in string literals ****/
1171                   //                  if ((currentCharacter == '\n')
1172                   //                    || (currentCharacter == '\r')) {
1173                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1174                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1175                   //                      if (currentPosition + lookAhead == source.length)
1176                   //                        break;
1177                   //                      if (source[currentPosition + lookAhead] == '\n')
1178                   //                        break;
1179                   //                      if (source[currentPosition + lookAhead] == '\"') {
1180                   //                        currentPosition += lookAhead + 1;
1181                   //                        break;
1182                   //                      }
1183                   //                    }
1184                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1185                   //                  }
1186                   if (currentCharacter == '\\') {
1187                     int escapeSize = currentPosition;
1188                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1189                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1190                     scanDoubleQuotedEscapeCharacter();
1191                     escapeSize = currentPosition - escapeSize;
1192                     if (withoutUnicodePtr == 0) {
1193                       //buffer all the entries that have been left aside....
1194                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1195                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1196                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1197                     } else { //overwrite the / in the buffer
1198                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1199                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1200                         withoutUnicodePtr--;
1201                       }
1202                     }
1203                   }
1204                   // consume next character
1205                   unicodeAsBackSlash = false;
1206                   currentCharacter = source[currentPosition++];
1207                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1208                   //                    && (source[currentPosition] == 'u')) {
1209                   //                    getNextUnicodeChar();
1210                   //                  } else {
1211                   if (withoutUnicodePtr != 0) {
1212                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1213                   }
1214                   //                  }
1215
1216                 }
1217               } catch (IndexOutOfBoundsException e) {
1218                 throw new InvalidInputException(UNTERMINATED_STRING);
1219               } catch (InvalidInputException e) {
1220                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1221                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1222                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1223                     if (currentPosition + lookAhead == source.length)
1224                       break;
1225                     if (source[currentPosition + lookAhead] == '\n')
1226                       break;
1227                     if (source[currentPosition + lookAhead] == '\"') {
1228                       currentPosition += lookAhead + 1;
1229                       break;
1230                     }
1231                   }
1232
1233                 }
1234                 throw e; // rethrow
1235               }
1236               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1237                 if (currentLine == null) {
1238                   currentLine = new NLSLine();
1239                   lines.add(currentLine);
1240                 }
1241                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1242               }
1243               return TokenNameStringLiteral;
1244             case '`' :
1245               try {
1246                 // consume next character
1247                 unicodeAsBackSlash = false;
1248                 currentCharacter = source[currentPosition++];
1249                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1250                 //                  && (source[currentPosition] == 'u')) {
1251                 //                  getNextUnicodeChar();
1252                 //                } else {
1253                 //                  if (withoutUnicodePtr != 0) {
1254                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1255                 //                      currentCharacter;
1256                 //                  }
1257                 //                }
1258
1259                 while (currentCharacter != '`') {
1260
1261                   /**** in PHP \r and \n are valid in string literals ****/
1262                   //                if ((currentCharacter == '\n')
1263                   //                  || (currentCharacter == '\r')) {
1264                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1265                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1266                   //                    if (currentPosition + lookAhead == source.length)
1267                   //                      break;
1268                   //                    if (source[currentPosition + lookAhead] == '\n')
1269                   //                      break;
1270                   //                    if (source[currentPosition + lookAhead] == '\"') {
1271                   //                      currentPosition += lookAhead + 1;
1272                   //                      break;
1273                   //                    }
1274                   //                  }
1275                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1276                   //                }
1277                   if (currentCharacter == '\\') {
1278                     int escapeSize = currentPosition;
1279                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1280                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1281                     scanDoubleQuotedEscapeCharacter();
1282                     escapeSize = currentPosition - escapeSize;
1283                     if (withoutUnicodePtr == 0) {
1284                       //buffer all the entries that have been left aside....
1285                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1286                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1287                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1288                     } else { //overwrite the / in the buffer
1289                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1290                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1291                         withoutUnicodePtr--;
1292                       }
1293                     }
1294                   }
1295                   // consume next character
1296                   unicodeAsBackSlash = false;
1297                   currentCharacter = source[currentPosition++];
1298                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1299                   //                    && (source[currentPosition] == 'u')) {
1300                   //                    getNextUnicodeChar();
1301                   //                  } else {
1302                   if (withoutUnicodePtr != 0) {
1303                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1304                   }
1305                   //                  }
1306
1307                 }
1308               } catch (IndexOutOfBoundsException e) {
1309                 throw new InvalidInputException(UNTERMINATED_STRING);
1310               } catch (InvalidInputException e) {
1311                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1312                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1313                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1314                     if (currentPosition + lookAhead == source.length)
1315                       break;
1316                     if (source[currentPosition + lookAhead] == '\n')
1317                       break;
1318                     if (source[currentPosition + lookAhead] == '`') {
1319                       currentPosition += lookAhead + 1;
1320                       break;
1321                     }
1322                   }
1323
1324                 }
1325                 throw e; // rethrow
1326               }
1327               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1328                 if (currentLine == null) {
1329                   currentLine = new NLSLine();
1330                   lines.add(currentLine);
1331                 }
1332                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1333               }
1334               return TokenNameStringInterpolated;
1335             case '#' :
1336             case '/' :
1337               {
1338                 int test;
1339                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1340                   //line comment 
1341                   int endPositionForLineComment = 0;
1342                   try { //get the next char 
1343                     currentCharacter = source[currentPosition++];
1344                     //                    if (((currentCharacter = source[currentPosition++])
1345                     //                      == '\\')
1346                     //                      && (source[currentPosition] == 'u')) {
1347                     //                      //-------------unicode traitement ------------
1348                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1349                     //                      currentPosition++;
1350                     //                      while (source[currentPosition] == 'u') {
1351                     //                        currentPosition++;
1352                     //                      }
1353                     //                      if ((c1 =
1354                     //                        Character.getNumericValue(source[currentPosition++]))
1355                     //                        > 15
1356                     //                        || c1 < 0
1357                     //                        || (c2 =
1358                     //                          Character.getNumericValue(source[currentPosition++]))
1359                     //                          > 15
1360                     //                        || c2 < 0
1361                     //                        || (c3 =
1362                     //                          Character.getNumericValue(source[currentPosition++]))
1363                     //                          > 15
1364                     //                        || c3 < 0
1365                     //                        || (c4 =
1366                     //                          Character.getNumericValue(source[currentPosition++]))
1367                     //                          > 15
1368                     //                        || c4 < 0) {
1369                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1370                     //                      } else {
1371                     //                        currentCharacter =
1372                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1373                     //                      }
1374                     //                    }
1375
1376                     //handle the \\u case manually into comment
1377                     //                    if (currentCharacter == '\\') {
1378                     //                      if (source[currentPosition] == '\\')
1379                     //                        currentPosition++;
1380                     //                    } //jump over the \\
1381                     boolean isUnicode = false;
1382                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1383                       if (currentCharacter == '?') {
1384                         if (getNextChar('>')) {
1385                           startPosition = currentPosition - 2;
1386                           phpMode = false;
1387                           return TokenNameStopPHP;
1388                         }
1389                       }
1390
1391                       //get the next char
1392                       isUnicode = false;
1393                       currentCharacter = source[currentPosition++];
1394                       //                      if (((currentCharacter = source[currentPosition++])
1395                       //                        == '\\')
1396                       //                        && (source[currentPosition] == 'u')) {
1397                       //                        isUnicode = true;
1398                       //                        //-------------unicode traitement ------------
1399                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1400                       //                        currentPosition++;
1401                       //                        while (source[currentPosition] == 'u') {
1402                       //                          currentPosition++;
1403                       //                        }
1404                       //                        if ((c1 =
1405                       //                          Character.getNumericValue(source[currentPosition++]))
1406                       //                          > 15
1407                       //                          || c1 < 0
1408                       //                          || (c2 =
1409                       //                            Character.getNumericValue(
1410                       //                              source[currentPosition++]))
1411                       //                            > 15
1412                       //                          || c2 < 0
1413                       //                          || (c3 =
1414                       //                            Character.getNumericValue(
1415                       //                              source[currentPosition++]))
1416                       //                            > 15
1417                       //                          || c3 < 0
1418                       //                          || (c4 =
1419                       //                            Character.getNumericValue(
1420                       //                              source[currentPosition++]))
1421                       //                            > 15
1422                       //                          || c4 < 0) {
1423                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1424                       //                        } else {
1425                       //                          currentCharacter =
1426                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1427                       //                        }
1428                       //                      }
1429                       //handle the \\u case manually into comment
1430                       //                      if (currentCharacter == '\\') {
1431                       //                        if (source[currentPosition] == '\\')
1432                       //                          currentPosition++;
1433                       //                      } //jump over the \\
1434                     }
1435                     if (isUnicode) {
1436                       endPositionForLineComment = currentPosition - 6;
1437                     } else {
1438                       endPositionForLineComment = currentPosition - 1;
1439                     }
1440                     recordComment(false);
1441                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1442                       checkNonExternalizeString();
1443                       if (recordLineSeparator) {
1444                         if (isUnicode) {
1445                           pushUnicodeLineSeparator();
1446                         } else {
1447                           pushLineSeparator();
1448                         }
1449                       } else {
1450                         currentLine = null;
1451                       }
1452                     }
1453                     if (tokenizeComments) {
1454                       if (!isUnicode) {
1455                         currentPosition = endPositionForLineComment;
1456                         // reset one character behind
1457                       }
1458                       return TokenNameCOMMENT_LINE;
1459                     }
1460                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1461                     if (tokenizeComments) {
1462                       currentPosition--;
1463                       // reset one character behind
1464                       return TokenNameCOMMENT_LINE;
1465                     }
1466                   }
1467                   break;
1468                 }
1469                 if (test > 0) {
1470                   //traditional and annotation comment
1471                   boolean isJavadoc = false, star = false;
1472                   // consume next character
1473                   unicodeAsBackSlash = false;
1474                   currentCharacter = source[currentPosition++];
1475                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1476                   //                    && (source[currentPosition] == 'u')) {
1477                   //                    getNextUnicodeChar();
1478                   //                  } else {
1479                   //                    if (withoutUnicodePtr != 0) {
1480                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1481                   //                        currentCharacter;
1482                   //                    }
1483                   //                  }
1484
1485                   if (currentCharacter == '*') {
1486                     isJavadoc = true;
1487                     star = true;
1488                   }
1489                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1490                     checkNonExternalizeString();
1491                     if (recordLineSeparator) {
1492                       pushLineSeparator();
1493                     } else {
1494                       currentLine = null;
1495                     }
1496                   }
1497                   try { //get the next char 
1498                     currentCharacter = source[currentPosition++];
1499                     //                    if (((currentCharacter = source[currentPosition++])
1500                     //                      == '\\')
1501                     //                      && (source[currentPosition] == 'u')) {
1502                     //                      //-------------unicode traitement ------------
1503                     //                      getNextUnicodeChar();
1504                     //                    }
1505                     //handle the \\u case manually into comment
1506                     //                    if (currentCharacter == '\\') {
1507                     //                      if (source[currentPosition] == '\\')
1508                     //                        currentPosition++;
1509                     //                      //jump over the \\
1510                     //                    }
1511                     // empty comment is not a javadoc /**/
1512                     if (currentCharacter == '/') {
1513                       isJavadoc = false;
1514                     }
1515                     //loop until end of comment */
1516                     while ((currentCharacter != '/') || (!star)) {
1517                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1518                         checkNonExternalizeString();
1519                         if (recordLineSeparator) {
1520                           pushLineSeparator();
1521                         } else {
1522                           currentLine = null;
1523                         }
1524                       }
1525                       star = currentCharacter == '*';
1526                       //get next char
1527                       currentCharacter = source[currentPosition++];
1528                       //                      if (((currentCharacter = source[currentPosition++])
1529                       //                        == '\\')
1530                       //                        && (source[currentPosition] == 'u')) {
1531                       //                        //-------------unicode traitement ------------
1532                       //                        getNextUnicodeChar();
1533                       //                      }
1534                       //handle the \\u case manually into comment
1535                       //                      if (currentCharacter == '\\') {
1536                       //                        if (source[currentPosition] == '\\')
1537                       //                          currentPosition++;
1538                       //                      } //jump over the \\
1539                     }
1540                     recordComment(isJavadoc);
1541                     if (tokenizeComments) {
1542                       if (isJavadoc)
1543                         return TokenNameCOMMENT_PHPDOC;
1544                       return TokenNameCOMMENT_BLOCK;
1545                     }
1546                   } catch (IndexOutOfBoundsException e) {
1547                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1548                   }
1549                   break;
1550                 }
1551                 if (getNextChar('='))
1552                   return TokenNameDIVIDE_EQUAL;
1553                 return TokenNameDIVIDE;
1554               }
1555             case '\u001a' :
1556               if (atEnd())
1557                 return TokenNameEOF;
1558               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1559               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1560
1561             default :
1562               if (currentCharacter == '$') {
1563                 while ((currentCharacter = source[currentPosition++]) == '$') {
1564                 }
1565                 if (currentCharacter == '{')
1566                   return TokenNameDOLLAR_LBRACE;
1567                 if (isPHPIdentifierStart(currentCharacter))
1568                   return scanIdentifierOrKeyword(true);
1569                 return TokenNameERROR;
1570               }
1571               if (isPHPIdentifierStart(currentCharacter))
1572                 return scanIdentifierOrKeyword(false);
1573               if (Character.isDigit(currentCharacter))
1574                 return scanNumber(false);
1575               return TokenNameERROR;
1576           }
1577         }
1578       } //-----------------end switch while try--------------------
1579       catch (IndexOutOfBoundsException e) {
1580       }
1581     }
1582     return TokenNameEOF;
1583   }
1584
1585   //  public final void getNextUnicodeChar()
1586   //    throws IndexOutOfBoundsException, InvalidInputException {
1587   //    //VOID
1588   //    //handle the case of unicode.
1589   //    //when a unicode appears then we must use a buffer that holds char internal values
1590   //    //At the end of this method currentCharacter holds the new visited char
1591   //    //and currentPosition points right next after it
1592   //
1593   //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
1594   //
1595   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1596   //    currentPosition++;
1597   //    while (source[currentPosition] == 'u') {
1598   //      currentPosition++;
1599   //      unicodeSize++;
1600   //    }
1601   //
1602   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1603   //      || c1 < 0
1604   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1605   //      || c2 < 0
1606   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1607   //      || c3 < 0
1608   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1609   //      || c4 < 0) {
1610   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1611   //    } else {
1612   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1613   //      //need the unicode buffer
1614   //      if (withoutUnicodePtr == 0) {
1615   //        //buffer all the entries that have been left aside....
1616   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1617   //        System.arraycopy(
1618   //          source,
1619   //          startPosition,
1620   //          withoutUnicodeBuffer,
1621   //          1,
1622   //          withoutUnicodePtr);
1623   //      }
1624   //      //fill the buffer with the char
1625   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1626   //    }
1627   //    unicodeAsBackSlash = currentCharacter == '\\';
1628   //  }
1629   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1630    */
1631   public final void jumpOverMethodBody() {
1632
1633     this.wasAcr = false;
1634     int found = 1;
1635     try {
1636       while (true) { //loop for jumping over comments
1637         // ---------Consume white space and handles startPosition---------
1638         boolean isWhiteSpace;
1639         do {
1640           startPosition = currentPosition;
1641           currentCharacter = source[currentPosition++];
1642           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1643           //            && (source[currentPosition] == 'u')) {
1644           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1645           //          } else {
1646           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1647             pushLineSeparator();
1648           isWhiteSpace = Character.isWhitespace(currentCharacter);
1649           //          }
1650         } while (isWhiteSpace);
1651
1652         // -------consume token until } is found---------
1653         switch (currentCharacter) {
1654           case '{' :
1655             found++;
1656             break;
1657           case '}' :
1658             found--;
1659             if (found == 0)
1660               return;
1661             break;
1662           case '\'' :
1663             {
1664               boolean test;
1665               test = getNextChar('\\');
1666               if (test) {
1667                 try {
1668                   scanDoubleQuotedEscapeCharacter();
1669                 } catch (InvalidInputException ex) {
1670                 };
1671               } else {
1672                 //                try { // consume next character
1673                 unicodeAsBackSlash = false;
1674                 currentCharacter = source[currentPosition++];
1675                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1676                 //                    && (source[currentPosition] == 'u')) {
1677                 //                    getNextUnicodeChar();
1678                 //                  } else {
1679                 if (withoutUnicodePtr != 0) {
1680                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1681                 }
1682                 //                  }
1683                 //                } catch (InvalidInputException ex) {
1684                 //                };
1685               }
1686               getNextChar('\'');
1687               break;
1688             }
1689           case '"' :
1690             try {
1691               //              try { // consume next character
1692               unicodeAsBackSlash = false;
1693               currentCharacter = source[currentPosition++];
1694               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1695               //                  && (source[currentPosition] == 'u')) {
1696               //                  getNextUnicodeChar();
1697               //                } else {
1698               if (withoutUnicodePtr != 0) {
1699                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1700               }
1701               //                }
1702               //              } catch (InvalidInputException ex) {
1703               //              };
1704               while (currentCharacter != '"') {
1705                 if (currentCharacter == '\r') {
1706                   if (source[currentPosition] == '\n')
1707                     currentPosition++;
1708                   break;
1709                   // the string cannot go further that the line
1710                 }
1711                 if (currentCharacter == '\n') {
1712                   break;
1713                   // the string cannot go further that the line
1714                 }
1715                 if (currentCharacter == '\\') {
1716                   try {
1717                     scanDoubleQuotedEscapeCharacter();
1718                   } catch (InvalidInputException ex) {
1719                   };
1720                 }
1721                 //                try { // consume next character
1722                 unicodeAsBackSlash = false;
1723                 currentCharacter = source[currentPosition++];
1724                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1725                 //                    && (source[currentPosition] == 'u')) {
1726                 //                    getNextUnicodeChar();
1727                 //                  } else {
1728                 if (withoutUnicodePtr != 0) {
1729                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1730                 }
1731                 //                  }
1732                 //                } catch (InvalidInputException ex) {
1733                 //                };
1734               }
1735             } catch (IndexOutOfBoundsException e) {
1736               return;
1737             }
1738             break;
1739           case '/' :
1740             {
1741               int test;
1742               if ((test = getNextChar('/', '*')) == 0) {
1743                 //line comment 
1744                 try {
1745                   //get the next char 
1746                   currentCharacter = source[currentPosition++];
1747                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1748                   //                    && (source[currentPosition] == 'u')) {
1749                   //                    //-------------unicode traitement ------------
1750                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1751                   //                    currentPosition++;
1752                   //                    while (source[currentPosition] == 'u') {
1753                   //                      currentPosition++;
1754                   //                    }
1755                   //                    if ((c1 =
1756                   //                      Character.getNumericValue(source[currentPosition++]))
1757                   //                      > 15
1758                   //                      || c1 < 0
1759                   //                      || (c2 =
1760                   //                        Character.getNumericValue(source[currentPosition++]))
1761                   //                        > 15
1762                   //                      || c2 < 0
1763                   //                      || (c3 =
1764                   //                        Character.getNumericValue(source[currentPosition++]))
1765                   //                        > 15
1766                   //                      || c3 < 0
1767                   //                      || (c4 =
1768                   //                        Character.getNumericValue(source[currentPosition++]))
1769                   //                        > 15
1770                   //                      || c4 < 0) {
1771                   //                      //error don't care of the value
1772                   //                      currentCharacter = 'A';
1773                   //                    } //something different from \n and \r
1774                   //                    else {
1775                   //                      currentCharacter =
1776                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1777                   //                    }
1778                   //                  }
1779
1780                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1781                     //get the next char 
1782                     currentCharacter = source[currentPosition++];
1783                     //                    if (((currentCharacter = source[currentPosition++])
1784                     //                      == '\\')
1785                     //                      && (source[currentPosition] == 'u')) {
1786                     //                      //-------------unicode traitement ------------
1787                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1788                     //                      currentPosition++;
1789                     //                      while (source[currentPosition] == 'u') {
1790                     //                        currentPosition++;
1791                     //                      }
1792                     //                      if ((c1 =
1793                     //                        Character.getNumericValue(source[currentPosition++]))
1794                     //                        > 15
1795                     //                        || c1 < 0
1796                     //                        || (c2 =
1797                     //                          Character.getNumericValue(source[currentPosition++]))
1798                     //                          > 15
1799                     //                        || c2 < 0
1800                     //                        || (c3 =
1801                     //                          Character.getNumericValue(source[currentPosition++]))
1802                     //                          > 15
1803                     //                        || c3 < 0
1804                     //                        || (c4 =
1805                     //                          Character.getNumericValue(source[currentPosition++]))
1806                     //                          > 15
1807                     //                        || c4 < 0) {
1808                     //                        //error don't care of the value
1809                     //                        currentCharacter = 'A';
1810                     //                      } //something different from \n and \r
1811                     //                      else {
1812                     //                        currentCharacter =
1813                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1814                     //                      }
1815                     //                    }
1816                   }
1817                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1818                     pushLineSeparator();
1819                 } catch (IndexOutOfBoundsException e) {
1820                 } //an eof will them be generated
1821                 break;
1822               }
1823               if (test > 0) {
1824                 //traditional and annotation comment
1825                 boolean star = false;
1826                 //                try { // consume next character
1827                 unicodeAsBackSlash = false;
1828                 currentCharacter = source[currentPosition++];
1829                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1830                 //                    && (source[currentPosition] == 'u')) {
1831                 //                    getNextUnicodeChar();
1832                 //                  } else {
1833                 if (withoutUnicodePtr != 0) {
1834                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1835                 }
1836                 //                  };
1837                 //                } catch (InvalidInputException ex) {
1838                 //                };
1839                 if (currentCharacter == '*') {
1840                   star = true;
1841                 }
1842                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1843                   pushLineSeparator();
1844                 try { //get the next char 
1845                   currentCharacter = source[currentPosition++];
1846                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1847                   //                    && (source[currentPosition] == 'u')) {
1848                   //                    //-------------unicode traitement ------------
1849                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1850                   //                    currentPosition++;
1851                   //                    while (source[currentPosition] == 'u') {
1852                   //                      currentPosition++;
1853                   //                    }
1854                   //                    if ((c1 =
1855                   //                      Character.getNumericValue(source[currentPosition++]))
1856                   //                      > 15
1857                   //                      || c1 < 0
1858                   //                      || (c2 =
1859                   //                        Character.getNumericValue(source[currentPosition++]))
1860                   //                        > 15
1861                   //                      || c2 < 0
1862                   //                      || (c3 =
1863                   //                        Character.getNumericValue(source[currentPosition++]))
1864                   //                        > 15
1865                   //                      || c3 < 0
1866                   //                      || (c4 =
1867                   //                        Character.getNumericValue(source[currentPosition++]))
1868                   //                        > 15
1869                   //                      || c4 < 0) {
1870                   //                      //error don't care of the value
1871                   //                      currentCharacter = 'A';
1872                   //                    } //something different from * and /
1873                   //                    else {
1874                   //                      currentCharacter =
1875                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1876                   //                    }
1877                   //                  }
1878                   //loop until end of comment */ 
1879                   while ((currentCharacter != '/') || (!star)) {
1880                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1881                       pushLineSeparator();
1882                     star = currentCharacter == '*';
1883                     //get next char
1884                     currentCharacter = source[currentPosition++];
1885                     //                    if (((currentCharacter = source[currentPosition++])
1886                     //                      == '\\')
1887                     //                      && (source[currentPosition] == 'u')) {
1888                     //                      //-------------unicode traitement ------------
1889                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1890                     //                      currentPosition++;
1891                     //                      while (source[currentPosition] == 'u') {
1892                     //                        currentPosition++;
1893                     //                      }
1894                     //                      if ((c1 =
1895                     //                        Character.getNumericValue(source[currentPosition++]))
1896                     //                        > 15
1897                     //                        || c1 < 0
1898                     //                        || (c2 =
1899                     //                          Character.getNumericValue(source[currentPosition++]))
1900                     //                          > 15
1901                     //                        || c2 < 0
1902                     //                        || (c3 =
1903                     //                          Character.getNumericValue(source[currentPosition++]))
1904                     //                          > 15
1905                     //                        || c3 < 0
1906                     //                        || (c4 =
1907                     //                          Character.getNumericValue(source[currentPosition++]))
1908                     //                          > 15
1909                     //                        || c4 < 0) {
1910                     //                        //error don't care of the value
1911                     //                        currentCharacter = 'A';
1912                     //                      } //something different from * and /
1913                     //                      else {
1914                     //                        currentCharacter =
1915                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1916                     //                      }
1917                     //                    }
1918                   }
1919                 } catch (IndexOutOfBoundsException e) {
1920                   return;
1921                 }
1922                 break;
1923               }
1924               break;
1925             }
1926
1927           default :
1928             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1929               try {
1930                 scanIdentifierOrKeyword((currentCharacter == '$'));
1931               } catch (InvalidInputException ex) {
1932               };
1933               break;
1934             }
1935             if (Character.isDigit(currentCharacter)) {
1936               try {
1937                 scanNumber(false);
1938               } catch (InvalidInputException ex) {
1939               };
1940               break;
1941             }
1942         }
1943       }
1944       //-----------------end switch while try--------------------
1945     } catch (IndexOutOfBoundsException e) {
1946     } catch (InvalidInputException e) {
1947     }
1948     return;
1949   }
1950   //  public final boolean jumpOverUnicodeWhiteSpace()
1951   //    throws InvalidInputException {
1952   //    //BOOLEAN
1953   //    //handle the case of unicode. Jump over the next whiteSpace
1954   //    //making startPosition pointing on the next available char
1955   //    //On false, the currentCharacter is filled up with a potential
1956   //    //correct char
1957   //
1958   //    try {
1959   //      this.wasAcr = false;
1960   //      int c1, c2, c3, c4;
1961   //      int unicodeSize = 6;
1962   //      currentPosition++;
1963   //      while (source[currentPosition] == 'u') {
1964   //        currentPosition++;
1965   //        unicodeSize++;
1966   //      }
1967   //
1968   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1969   //        || c1 < 0)
1970   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1971   //          || c2 < 0)
1972   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1973   //          || c3 < 0)
1974   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1975   //          || c4 < 0)) {
1976   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1977   //      }
1978   //
1979   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1980   //      if (recordLineSeparator
1981   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1982   //        pushLineSeparator();
1983   //      if (Character.isWhitespace(currentCharacter))
1984   //        return true;
1985   //
1986   //      //buffer the new char which is not a white space
1987   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1988   //      //withoutUnicodePtr == 1 is true here
1989   //      return false;
1990   //    } catch (IndexOutOfBoundsException e) {
1991   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1992   //    }
1993   //  }
1994   public final int[] getLineEnds() {
1995     //return a bounded copy of this.lineEnds 
1996
1997     int[] copy;
1998     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
1999     return copy;
2000   }
2001
2002   public char[] getSource() {
2003     return this.source;
2004   }
2005   final char[] optimizedCurrentTokenSource1() {
2006     //return always the same char[] build only once
2007
2008     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2009     char charOne = source[startPosition];
2010     switch (charOne) {
2011       case 'a' :
2012         return charArray_a;
2013       case 'b' :
2014         return charArray_b;
2015       case 'c' :
2016         return charArray_c;
2017       case 'd' :
2018         return charArray_d;
2019       case 'e' :
2020         return charArray_e;
2021       case 'f' :
2022         return charArray_f;
2023       case 'g' :
2024         return charArray_g;
2025       case 'h' :
2026         return charArray_h;
2027       case 'i' :
2028         return charArray_i;
2029       case 'j' :
2030         return charArray_j;
2031       case 'k' :
2032         return charArray_k;
2033       case 'l' :
2034         return charArray_l;
2035       case 'm' :
2036         return charArray_m;
2037       case 'n' :
2038         return charArray_n;
2039       case 'o' :
2040         return charArray_o;
2041       case 'p' :
2042         return charArray_p;
2043       case 'q' :
2044         return charArray_q;
2045       case 'r' :
2046         return charArray_r;
2047       case 's' :
2048         return charArray_s;
2049       case 't' :
2050         return charArray_t;
2051       case 'u' :
2052         return charArray_u;
2053       case 'v' :
2054         return charArray_v;
2055       case 'w' :
2056         return charArray_w;
2057       case 'x' :
2058         return charArray_x;
2059       case 'y' :
2060         return charArray_y;
2061       case 'z' :
2062         return charArray_z;
2063       default :
2064         return new char[] { charOne };
2065     }
2066   }
2067
2068   final char[] optimizedCurrentTokenSource2() {
2069     //try to return the same char[] build only once
2070
2071     char c0, c1;
2072     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2073     char[][] table = charArray_length[0][hash];
2074     int i = newEntry2;
2075     while (++i < InternalTableSize) {
2076       char[] charArray = table[i];
2077       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2078         return charArray;
2079     }
2080     //---------other side---------
2081     i = -1;
2082     int max = newEntry2;
2083     while (++i <= max) {
2084       char[] charArray = table[i];
2085       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2086         return charArray;
2087     }
2088     //--------add the entry-------
2089     if (++max >= InternalTableSize)
2090       max = 0;
2091     char[] r;
2092     table[max] = (r = new char[] { c0, c1 });
2093     newEntry2 = max;
2094     return r;
2095   }
2096
2097   final char[] optimizedCurrentTokenSource3() {
2098     //try to return the same char[] build only once
2099
2100     char c0, c1, c2;
2101     int hash =
2102       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2103         % TableSize;
2104     char[][] table = charArray_length[1][hash];
2105     int i = newEntry3;
2106     while (++i < InternalTableSize) {
2107       char[] charArray = table[i];
2108       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2109         return charArray;
2110     }
2111     //---------other side---------
2112     i = -1;
2113     int max = newEntry3;
2114     while (++i <= max) {
2115       char[] charArray = table[i];
2116       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2117         return charArray;
2118     }
2119     //--------add the entry-------
2120     if (++max >= InternalTableSize)
2121       max = 0;
2122     char[] r;
2123     table[max] = (r = new char[] { c0, c1, c2 });
2124     newEntry3 = max;
2125     return r;
2126   }
2127
2128   final char[] optimizedCurrentTokenSource4() {
2129     //try to return the same char[] build only once
2130
2131     char c0, c1, c2, c3;
2132     long hash =
2133       ((((long) (c0 = source[startPosition])) << 18)
2134         + ((c1 = source[startPosition + 1]) << 12)
2135         + ((c2 = source[startPosition + 2]) << 6)
2136         + (c3 = source[startPosition + 3]))
2137         % TableSize;
2138     char[][] table = charArray_length[2][(int) hash];
2139     int i = newEntry4;
2140     while (++i < InternalTableSize) {
2141       char[] charArray = table[i];
2142       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2143         return charArray;
2144     }
2145     //---------other side---------
2146     i = -1;
2147     int max = newEntry4;
2148     while (++i <= max) {
2149       char[] charArray = table[i];
2150       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2151         return charArray;
2152     }
2153     //--------add the entry-------
2154     if (++max >= InternalTableSize)
2155       max = 0;
2156     char[] r;
2157     table[max] = (r = new char[] { c0, c1, c2, c3 });
2158     newEntry4 = max;
2159     return r;
2160
2161   }
2162
2163   final char[] optimizedCurrentTokenSource5() {
2164     //try to return the same char[] build only once
2165
2166     char c0, c1, c2, c3, c4;
2167     long hash =
2168       ((((long) (c0 = source[startPosition])) << 24)
2169         + (((long) (c1 = source[startPosition + 1])) << 18)
2170         + ((c2 = source[startPosition + 2]) << 12)
2171         + ((c3 = source[startPosition + 3]) << 6)
2172         + (c4 = source[startPosition + 4]))
2173         % TableSize;
2174     char[][] table = charArray_length[3][(int) hash];
2175     int i = newEntry5;
2176     while (++i < InternalTableSize) {
2177       char[] charArray = table[i];
2178       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2179         return charArray;
2180     }
2181     //---------other side---------
2182     i = -1;
2183     int max = newEntry5;
2184     while (++i <= max) {
2185       char[] charArray = table[i];
2186       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2187         return charArray;
2188     }
2189     //--------add the entry-------
2190     if (++max >= InternalTableSize)
2191       max = 0;
2192     char[] r;
2193     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2194     newEntry5 = max;
2195     return r;
2196
2197   }
2198
2199   final char[] optimizedCurrentTokenSource6() {
2200     //try to return the same char[] build only once
2201
2202     char c0, c1, c2, c3, c4, c5;
2203     long hash =
2204       ((((long) (c0 = source[startPosition])) << 32)
2205         + (((long) (c1 = source[startPosition + 1])) << 24)
2206         + (((long) (c2 = source[startPosition + 2])) << 18)
2207         + ((c3 = source[startPosition + 3]) << 12)
2208         + ((c4 = source[startPosition + 4]) << 6)
2209         + (c5 = source[startPosition + 5]))
2210         % TableSize;
2211     char[][] table = charArray_length[4][(int) hash];
2212     int i = newEntry6;
2213     while (++i < InternalTableSize) {
2214       char[] charArray = table[i];
2215       if ((c0 == charArray[0])
2216         && (c1 == charArray[1])
2217         && (c2 == charArray[2])
2218         && (c3 == charArray[3])
2219         && (c4 == charArray[4])
2220         && (c5 == charArray[5]))
2221         return charArray;
2222     }
2223     //---------other side---------
2224     i = -1;
2225     int max = newEntry6;
2226     while (++i <= max) {
2227       char[] charArray = table[i];
2228       if ((c0 == charArray[0])
2229         && (c1 == charArray[1])
2230         && (c2 == charArray[2])
2231         && (c3 == charArray[3])
2232         && (c4 == charArray[4])
2233         && (c5 == charArray[5]))
2234         return charArray;
2235     }
2236     //--------add the entry-------
2237     if (++max >= InternalTableSize)
2238       max = 0;
2239     char[] r;
2240     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2241     newEntry6 = max;
2242     return r;
2243   }
2244
2245   public final void pushLineSeparator() throws InvalidInputException {
2246     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2247     final int INCREMENT = 250;
2248
2249     if (this.checkNonExternalizedStringLiterals) {
2250       // reinitialize the current line for non externalize strings purpose
2251       currentLine = null;
2252     }
2253     //currentCharacter is at position currentPosition-1
2254
2255     // cr 000D
2256     if (currentCharacter == '\r') {
2257       int separatorPos = currentPosition - 1;
2258       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2259         return;
2260       //System.out.println("CR-" + separatorPos);
2261       try {
2262         lineEnds[++linePtr] = separatorPos;
2263       } catch (IndexOutOfBoundsException e) {
2264         //linePtr value is correct
2265         int oldLength = lineEnds.length;
2266         int[] old = lineEnds;
2267         lineEnds = new int[oldLength + INCREMENT];
2268         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2269         lineEnds[linePtr] = separatorPos;
2270       }
2271       // look-ahead for merged cr+lf
2272       try {
2273         if (source[currentPosition] == '\n') {
2274           //System.out.println("look-ahead LF-" + currentPosition);                     
2275           lineEnds[linePtr] = currentPosition;
2276           currentPosition++;
2277           wasAcr = false;
2278         } else {
2279           wasAcr = true;
2280         }
2281       } catch (IndexOutOfBoundsException e) {
2282         wasAcr = true;
2283       }
2284     } else {
2285       // lf 000A
2286       if (currentCharacter == '\n') {
2287         //must merge eventual cr followed by lf
2288         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2289           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2290           lineEnds[linePtr] = currentPosition - 1;
2291         } else {
2292           int separatorPos = currentPosition - 1;
2293           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2294             return;
2295           // System.out.println("LF-" + separatorPos);                                                  
2296           try {
2297             lineEnds[++linePtr] = separatorPos;
2298           } catch (IndexOutOfBoundsException e) {
2299             //linePtr value is correct
2300             int oldLength = lineEnds.length;
2301             int[] old = lineEnds;
2302             lineEnds = new int[oldLength + INCREMENT];
2303             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2304             lineEnds[linePtr] = separatorPos;
2305           }
2306         }
2307         wasAcr = false;
2308       }
2309     }
2310   }
2311   public final void pushUnicodeLineSeparator() {
2312     // isUnicode means that the \r or \n has been read as a unicode character
2313
2314     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2315
2316     final int INCREMENT = 250;
2317     //currentCharacter is at position currentPosition-1
2318
2319     if (this.checkNonExternalizedStringLiterals) {
2320       // reinitialize the current line for non externalize strings purpose
2321       currentLine = null;
2322     }
2323
2324     // cr 000D
2325     if (currentCharacter == '\r') {
2326       int separatorPos = currentPosition - 6;
2327       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2328         return;
2329       //System.out.println("CR-" + separatorPos);
2330       try {
2331         lineEnds[++linePtr] = separatorPos;
2332       } catch (IndexOutOfBoundsException e) {
2333         //linePtr value is correct
2334         int oldLength = lineEnds.length;
2335         int[] old = lineEnds;
2336         lineEnds = new int[oldLength + INCREMENT];
2337         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2338         lineEnds[linePtr] = separatorPos;
2339       }
2340       // look-ahead for merged cr+lf
2341       if (source[currentPosition] == '\n') {
2342         //System.out.println("look-ahead LF-" + currentPosition);                       
2343         lineEnds[linePtr] = currentPosition;
2344         currentPosition++;
2345         wasAcr = false;
2346       } else {
2347         wasAcr = true;
2348       }
2349     } else {
2350       // lf 000A
2351       if (currentCharacter == '\n') {
2352         //must merge eventual cr followed by lf
2353         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2354           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2355           lineEnds[linePtr] = currentPosition - 6;
2356         } else {
2357           int separatorPos = currentPosition - 6;
2358           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2359             return;
2360           // System.out.println("LF-" + separatorPos);                                                  
2361           try {
2362             lineEnds[++linePtr] = separatorPos;
2363           } catch (IndexOutOfBoundsException e) {
2364             //linePtr value is correct
2365             int oldLength = lineEnds.length;
2366             int[] old = lineEnds;
2367             lineEnds = new int[oldLength + INCREMENT];
2368             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2369             lineEnds[linePtr] = separatorPos;
2370           }
2371         }
2372         wasAcr = false;
2373       }
2374     }
2375   }
2376   public final void recordComment(boolean isJavadoc) {
2377
2378     // a new annotation comment is recorded
2379     try {
2380       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2381     } catch (IndexOutOfBoundsException e) {
2382       int oldStackLength = commentStops.length;
2383       int[] oldStack = commentStops;
2384       commentStops = new int[oldStackLength + 30];
2385       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2386       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2387       //grows the positions buffers too
2388       int[] old = commentStarts;
2389       commentStarts = new int[oldStackLength + 30];
2390       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2391     }
2392
2393     //the buffer is of a correct size here
2394     commentStarts[commentPtr] = startPosition;
2395   }
2396   public void resetTo(int begin, int end) {
2397     //reset the scanner to a given position where it may rescan again
2398
2399     diet = false;
2400     initialPosition = startPosition = currentPosition = begin;
2401     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2402     commentPtr = -1; // reset comment stack
2403   }
2404
2405   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2406     // the string with "\\u" is a legal string of two chars \ and u
2407     //thus we use a direct access to the source (for regular cases).
2408
2409     //    if (unicodeAsBackSlash) {
2410     //      // consume next character
2411     //      unicodeAsBackSlash = false;
2412     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2413     //        && (source[currentPosition] == 'u')) {
2414     //        getNextUnicodeChar();
2415     //      } else {
2416     //        if (withoutUnicodePtr != 0) {
2417     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2418     //        }
2419     //      }
2420     //    } else
2421     currentCharacter = source[currentPosition++];
2422     switch (currentCharacter) {
2423       case '\'' :
2424         currentCharacter = '\'';
2425         break;
2426       case '\\' :
2427         currentCharacter = '\\';
2428         break;
2429       default :
2430         currentCharacter = '\\';
2431         currentPosition--;
2432     }
2433   }
2434
2435   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2436     // the string with "\\u" is a legal string of two chars \ and u
2437     //thus we use a direct access to the source (for regular cases).
2438
2439     //    if (unicodeAsBackSlash) {
2440     //      // consume next character
2441     //      unicodeAsBackSlash = false;
2442     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2443     //        && (source[currentPosition] == 'u')) {
2444     //        getNextUnicodeChar();
2445     //      } else {
2446     //        if (withoutUnicodePtr != 0) {
2447     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2448     //        }
2449     //      }
2450     //    } else
2451     currentCharacter = source[currentPosition++];
2452     switch (currentCharacter) {
2453       //      case 'b' :
2454       //        currentCharacter = '\b';
2455       //        break;
2456       case 't' :
2457         currentCharacter = '\t';
2458         break;
2459       case 'n' :
2460         currentCharacter = '\n';
2461         break;
2462         //      case 'f' :
2463         //        currentCharacter = '\f';
2464         //        break;
2465       case 'r' :
2466         currentCharacter = '\r';
2467         break;
2468       case '\"' :
2469         currentCharacter = '\"';
2470         break;
2471       case '\'' :
2472         currentCharacter = '\'';
2473         break;
2474       case '\\' :
2475         currentCharacter = '\\';
2476         break;
2477       case '$' :
2478         currentCharacter = '$';
2479         break;
2480       default :
2481         // -----------octal escape--------------
2482         // OctalDigit
2483         // OctalDigit OctalDigit
2484         // ZeroToThree OctalDigit OctalDigit
2485
2486         int number = Character.getNumericValue(currentCharacter);
2487         if (number >= 0 && number <= 7) {
2488           boolean zeroToThreeNot = number > 3;
2489           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2490             int digit = Character.getNumericValue(currentCharacter);
2491             if (digit >= 0 && digit <= 7) {
2492               number = (number * 8) + digit;
2493               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2494                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2495                   currentPosition--;
2496                 } else {
2497                   digit = Character.getNumericValue(currentCharacter);
2498                   if (digit >= 0 && digit <= 7) {
2499                     // has read \ZeroToThree OctalDigit OctalDigit
2500                     number = (number * 8) + digit;
2501                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2502                     currentPosition--;
2503                   }
2504                 }
2505               } else { // has read \OctalDigit NonDigit--> ignore last character
2506                 currentPosition--;
2507               }
2508             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
2509               currentPosition--;
2510             }
2511           } else { // has read \OctalDigit --> ignore last character
2512             currentPosition--;
2513           }
2514           if (number > 255)
2515             throw new InvalidInputException(INVALID_ESCAPE);
2516           currentCharacter = (char) number;
2517         }
2518         //else
2519         //     throw new InvalidInputException(INVALID_ESCAPE);
2520     }
2521   }
2522
2523   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2524   //    return scanIdentifierOrKeyword( false );
2525   //  }
2526
2527   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2528     //test keywords
2529
2530     //first dispatch on the first char.
2531     //then the length. If there are several
2532     //keywors with the same length AND the same first char, then do another
2533     //disptach on the second char :-)...cool....but fast !
2534
2535     useAssertAsAnIndentifier = false;
2536
2537     while (getNextCharAsJavaIdentifierPart()) {
2538     };
2539
2540     if (isVariable) {
2541       return TokenNameVariable;
2542     }
2543     int index, length;
2544     char[] data;
2545     char firstLetter;
2546     //    if (withoutUnicodePtr == 0)
2547
2548     //quick test on length == 1 but not on length > 12 while most identifier
2549     //have a length which is <= 12...but there are lots of identifier with
2550     //only one char....
2551
2552     //      {
2553     if ((length = currentPosition - startPosition) == 1)
2554       return TokenNameIdentifier;
2555     //  data = source;
2556     data = new char[length];
2557     index = startPosition;
2558     for (int i = 0; i < length; i++) {
2559       data[i] = Character.toLowerCase(source[index + i]);
2560     }
2561     index = 0;
2562     //    } else {
2563     //      if ((length = withoutUnicodePtr) == 1)
2564     //        return TokenNameIdentifier;
2565     //      // data = withoutUnicodeBuffer;
2566     //      data = new char[withoutUnicodeBuffer.length];
2567     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2568     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2569     //      }
2570     //      index = 1;
2571     //    }
2572
2573     firstLetter = data[index];
2574     switch (firstLetter) {
2575
2576       case 'a' : // as and array
2577         switch (length) {
2578           case 2 : //as
2579             if ((data[++index] == 's')) {
2580               return TokenNameas;
2581             } else {
2582               return TokenNameIdentifier;
2583             }
2584           case 3 : //and
2585             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2586               return TokenNameAND;
2587             } else {
2588               return TokenNameIdentifier;
2589             }
2590             //          case 5 :
2591             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2592             //              return TokenNamearray;
2593             //            else
2594             //              return TokenNameIdentifier;
2595           default :
2596             return TokenNameIdentifier;
2597         }
2598       case 'b' : //break
2599         switch (length) {
2600           case 5 :
2601             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2602               return TokenNamebreak;
2603             else
2604               return TokenNameIdentifier;
2605           default :
2606             return TokenNameIdentifier;
2607         }
2608
2609       case 'c' : //case class continue
2610         switch (length) {
2611           case 4 :
2612             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2613               return TokenNamecase;
2614             else
2615               return TokenNameIdentifier;
2616           case 5 :
2617             if ((data[++index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2618               return TokenNameclass;
2619             else
2620               return TokenNameIdentifier;
2621           case 8 :
2622             if ((data[++index] == 'o')
2623               && (data[++index] == 'n')
2624               && (data[++index] == 't')
2625               && (data[++index] == 'i')
2626               && (data[++index] == 'n')
2627               && (data[++index] == 'u')
2628               && (data[++index] == 'e'))
2629               return TokenNamecontinue;
2630             else
2631               return TokenNameIdentifier;
2632           default :
2633             return TokenNameIdentifier;
2634         }
2635
2636       case 'd' : //define default do 
2637         switch (length) {
2638           case 2 :
2639             if ((data[++index] == 'o'))
2640               return TokenNamedo;
2641             else
2642               return TokenNameIdentifier;
2643           case 6 :
2644             if ((data[++index] == 'e')
2645               && (data[++index] == 'f')
2646               && (data[++index] == 'i')
2647               && (data[++index] == 'n')
2648               && (data[++index] == 'e'))
2649               return TokenNamedefine;
2650             else
2651               return TokenNameIdentifier;
2652           case 7 :
2653             if ((data[++index] == 'e')
2654               && (data[++index] == 'f')
2655               && (data[++index] == 'a')
2656               && (data[++index] == 'u')
2657               && (data[++index] == 'l')
2658               && (data[++index] == 't'))
2659               return TokenNamedefault;
2660             else
2661               return TokenNameIdentifier;
2662           default :
2663             return TokenNameIdentifier;
2664         }
2665       case 'e' : //echo else elseif extends
2666         switch (length) {
2667           case 4 :
2668             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2669               return TokenNameecho;
2670             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2671               return TokenNameelse;
2672             else
2673               return TokenNameIdentifier;
2674           case 5 : // endif
2675             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2676               return TokenNameendif;
2677             else
2678               return TokenNameIdentifier;
2679           case 6 : // endfor
2680             if ((data[++index] == 'n')
2681               && (data[++index] == 'd')
2682               && (data[++index] == 'f')
2683               && (data[++index] == 'o')
2684               && (data[++index] == 'r'))
2685               return TokenNameendfor;
2686             else if (
2687               (data[index] == 'l')
2688                 && (data[++index] == 's')
2689                 && (data[++index] == 'e')
2690                 && (data[++index] == 'i')
2691                 && (data[++index] == 'f'))
2692               return TokenNameelseif;
2693             else
2694               return TokenNameIdentifier;
2695           case 7 :
2696             if ((data[++index] == 'x')
2697               && (data[++index] == 't')
2698               && (data[++index] == 'e')
2699               && (data[++index] == 'n')
2700               && (data[++index] == 'd')
2701               && (data[++index] == 's'))
2702               return TokenNameextends;
2703             else
2704               return TokenNameIdentifier;
2705           case 8 : // endwhile
2706             if ((data[++index] == 'n')
2707               && (data[++index] == 'd')
2708               && (data[++index] == 'w')
2709               && (data[++index] == 'h')
2710               && (data[++index] == 'i')
2711               && (data[++index] == 'l')
2712               && (data[++index] == 'e'))
2713               return TokenNameendwhile;
2714             else
2715               return TokenNameIdentifier;
2716           case 9 : // endswitch
2717             if ((data[++index] == 'n')
2718               && (data[++index] == 'd')
2719               && (data[++index] == 's')
2720               && (data[++index] == 'w')
2721               && (data[++index] == 'i')
2722               && (data[++index] == 't')
2723               && (data[++index] == 'c')
2724               && (data[++index] == 'h'))
2725               return TokenNameendswitch;
2726             else
2727               return TokenNameIdentifier;
2728           case 10 : // endforeach
2729             if ((data[++index] == 'n')
2730               && (data[++index] == 'd')
2731               && (data[++index] == 'f')
2732               && (data[++index] == 'o')
2733               && (data[++index] == 'r')
2734               && (data[++index] == 'e')
2735               && (data[++index] == 'a')
2736               && (data[++index] == 'c')
2737               && (data[++index] == 'h'))
2738               return TokenNameendforeach;
2739             else
2740               return TokenNameIdentifier;
2741
2742           default :
2743             return TokenNameIdentifier;
2744         }
2745
2746       case 'f' : //for false function
2747         switch (length) {
2748           case 3 :
2749             if ((data[++index] == 'o') && (data[++index] == 'r'))
2750               return TokenNamefor;
2751             else
2752               return TokenNameIdentifier;
2753           case 5 :
2754             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2755               return TokenNamefalse;
2756             else
2757               return TokenNameIdentifier;
2758           case 7 : // function
2759             if ((data[++index] == 'o')
2760               && (data[++index] == 'r')
2761               && (data[++index] == 'e')
2762               && (data[++index] == 'a')
2763               && (data[++index] == 'c')
2764               && (data[++index] == 'h'))
2765               return TokenNameforeach;
2766             else
2767               return TokenNameIdentifier;
2768           case 8 : // function
2769             if ((data[++index] == 'u')
2770               && (data[++index] == 'n')
2771               && (data[++index] == 'c')
2772               && (data[++index] == 't')
2773               && (data[++index] == 'i')
2774               && (data[++index] == 'o')
2775               && (data[++index] == 'n'))
2776               return TokenNamefunction;
2777             else
2778               return TokenNameIdentifier;
2779           default :
2780             return TokenNameIdentifier;
2781         }
2782       case 'g' : //global
2783         if (length == 6) {
2784           if ((data[++index] == 'l')
2785             && (data[++index] == 'o')
2786             && (data[++index] == 'b')
2787             && (data[++index] == 'a')
2788             && (data[++index] == 'l')) {
2789             return TokenNameglobal;
2790           }
2791         }
2792         return TokenNameIdentifier;
2793
2794       case 'i' : //if int 
2795         switch (length) {
2796           case 2 :
2797             if (data[++index] == 'f')
2798               return TokenNameif;
2799             else
2800               return TokenNameIdentifier;
2801             //          case 3 :
2802             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2803             //              return TokenNameint;
2804             //            else
2805             //              return TokenNameIdentifier;
2806           case 7 :
2807             if ((data[++index] == 'n')
2808               && (data[++index] == 'c')
2809               && (data[++index] == 'l')
2810               && (data[++index] == 'u')
2811               && (data[++index] == 'd')
2812               && (data[++index] == 'e'))
2813               return TokenNameinclude;
2814             else
2815               return TokenNameIdentifier;
2816           case 12 :
2817             if ((data[++index] == 'n')
2818               && (data[++index] == 'c')
2819               && (data[++index] == 'l')
2820               && (data[++index] == 'u')
2821               && (data[++index] == 'd')
2822               && (data[++index] == 'e')
2823               && (data[++index] == '_')
2824               && (data[++index] == 'o')
2825               && (data[++index] == 'n')
2826               && (data[++index] == 'c')
2827               && (data[++index] == 'e'))
2828               return TokenNameinclude_once;
2829             else
2830               return TokenNameIdentifier;
2831           default :
2832             return TokenNameIdentifier;
2833         }
2834
2835       case 'l' : //list
2836         if (length == 4) {
2837           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2838             return TokenNamelist;
2839           }
2840         }
2841         return TokenNameIdentifier;
2842
2843       case 'n' : // new null
2844         switch (length) {
2845           case 3 :
2846             if ((data[++index] == 'e') && (data[++index] == 'w'))
2847               return TokenNamenew;
2848             else
2849               return TokenNameIdentifier;
2850           case 4 :
2851             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2852               return TokenNamenull;
2853             else
2854               return TokenNameIdentifier;
2855
2856           default :
2857             return TokenNameIdentifier;
2858         }
2859       case 'o' : // or old_function
2860         if (length == 2) {
2861           if (data[++index] == 'r') {
2862             return TokenNameOR;
2863           }
2864         }
2865         //        if (length == 12) {
2866         //          if ((data[++index] == 'l')
2867         //            && (data[++index] == 'd')
2868         //            && (data[++index] == '_')
2869         //            && (data[++index] == 'f')
2870         //            && (data[++index] == 'u')
2871         //            && (data[++index] == 'n')
2872         //            && (data[++index] == 'c')
2873         //            && (data[++index] == 't')
2874         //            && (data[++index] == 'i')
2875         //            && (data[++index] == 'o')
2876         //            && (data[++index] == 'n')) {
2877         //            return TokenNameold_function;
2878         //          }
2879         //        }
2880         return TokenNameIdentifier;
2881
2882       case 'p' : // print
2883         if (length == 5) {
2884           if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2885             return TokenNameprint;
2886           }
2887         }
2888         return TokenNameIdentifier;
2889       case 'r' : //return require require_once
2890         if (length == 6) {
2891           if ((data[++index] == 'e')
2892             && (data[++index] == 't')
2893             && (data[++index] == 'u')
2894             && (data[++index] == 'r')
2895             && (data[++index] == 'n')) {
2896             return TokenNamereturn;
2897           }
2898         } else if (length == 7) {
2899           if ((data[++index] == 'e')
2900             && (data[++index] == 'q')
2901             && (data[++index] == 'u')
2902             && (data[++index] == 'i')
2903             && (data[++index] == 'r')
2904             && (data[++index] == 'e')) {
2905             return TokenNamerequire;
2906           }
2907         } else if (length == 12) {
2908           if ((data[++index] == 'e')
2909             && (data[++index] == 'q')
2910             && (data[++index] == 'u')
2911             && (data[++index] == 'i')
2912             && (data[++index] == 'r')
2913             && (data[++index] == 'e')
2914             && (data[++index] == '_')
2915             && (data[++index] == 'o')
2916             && (data[++index] == 'n')
2917             && (data[++index] == 'c')
2918             && (data[++index] == 'e')) {
2919             return TokenNamerequire_once;
2920           }
2921         } else
2922           return TokenNameIdentifier;
2923
2924       case 's' : //static switch 
2925         switch (length) {
2926           case 6 :
2927             if (data[++index] == 't')
2928               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
2929                 return TokenNamestatic;
2930               } else
2931                 return TokenNameIdentifier;
2932             else if (
2933               (data[index] == 'w')
2934                 && (data[++index] == 'i')
2935                 && (data[++index] == 't')
2936                 && (data[++index] == 'c')
2937                 && (data[++index] == 'h'))
2938               return TokenNameswitch;
2939             else
2940               return TokenNameIdentifier;
2941           default :
2942             return TokenNameIdentifier;
2943         }
2944
2945       case 't' : // true
2946         switch (length) {
2947
2948           case 4 :
2949             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
2950               return TokenNametrue;
2951             else
2952               return TokenNameIdentifier;
2953             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
2954             //              return TokenNamethis;
2955
2956           default :
2957             return TokenNameIdentifier;
2958         }
2959
2960       case 'v' : //var 
2961         switch (length) {
2962           case 3 :
2963             if ((data[++index] == 'a') && (data[++index] == 'r'))
2964               return TokenNamevar;
2965             else
2966               return TokenNameIdentifier;
2967
2968           default :
2969             return TokenNameIdentifier;
2970         }
2971
2972       case 'w' : //while 
2973         switch (length) {
2974           case 5 :
2975             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
2976               return TokenNamewhile;
2977             else
2978               return TokenNameIdentifier;
2979             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
2980             //return TokenNamewidefp ;
2981             //else
2982             //return TokenNameIdentifier;
2983           default :
2984             return TokenNameIdentifier;
2985         }
2986
2987       case 'x' : //xor
2988         switch (length) {
2989           case 3 :
2990             if ((data[++index] == 'o') && (data[++index] == 'r'))
2991               return TokenNameXOR;
2992             else
2993               return TokenNameIdentifier;
2994
2995           default :
2996             return TokenNameIdentifier;
2997         }
2998       default :
2999         return TokenNameIdentifier;
3000     }
3001   }
3002   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3003
3004     //when entering this method the currentCharacter is the firt
3005     //digit of the number , i.e. it may be preceeded by a . when
3006     //dotPrefix is true
3007
3008     boolean floating = dotPrefix;
3009     if ((!dotPrefix) && (currentCharacter == '0')) {
3010       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3011         //force the first char of the hexa number do exist...
3012         // consume next character
3013         unicodeAsBackSlash = false;
3014         currentCharacter = source[currentPosition++];
3015         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3016         //          && (source[currentPosition] == 'u')) {
3017         //          getNextUnicodeChar();
3018         //        } else {
3019         //          if (withoutUnicodePtr != 0) {
3020         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3021         //          }
3022         //        }
3023         if (Character.digit(currentCharacter, 16) == -1)
3024           throw new InvalidInputException(INVALID_HEXA);
3025         //---end forcing--
3026         while (getNextCharAsDigit(16)) {
3027         };
3028         //        if (getNextChar('l', 'L') >= 0)
3029         //          return TokenNameLongLiteral;
3030         //        else
3031         return TokenNameIntegerLiteral;
3032       }
3033
3034       //there is x or X in the number
3035       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3036       if (getNextCharAsDigit()) {
3037         //-------------potential octal-----------------
3038         while (getNextCharAsDigit()) {
3039         };
3040
3041         //        if (getNextChar('l', 'L') >= 0) {
3042         //          return TokenNameLongLiteral;
3043         //        }
3044         //
3045         //        if (getNextChar('f', 'F') >= 0) {
3046         //          return TokenNameFloatingPointLiteral;
3047         //        }
3048
3049         if (getNextChar('d', 'D') >= 0) {
3050           return TokenNameDoubleLiteral;
3051         } else { //make the distinction between octal and float ....
3052           if (getNextChar('.')) { //bingo ! ....
3053             while (getNextCharAsDigit()) {
3054             };
3055             if (getNextChar('e', 'E') >= 0) {
3056               // consume next character
3057               unicodeAsBackSlash = false;
3058               currentCharacter = source[currentPosition++];
3059               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3060               //                && (source[currentPosition] == 'u')) {
3061               //                getNextUnicodeChar();
3062               //              } else {
3063               //                if (withoutUnicodePtr != 0) {
3064               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3065               //                }
3066               //              }
3067
3068               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3069                 // consume next character
3070                 unicodeAsBackSlash = false;
3071                 currentCharacter = source[currentPosition++];
3072                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3073                 //                  && (source[currentPosition] == 'u')) {
3074                 //                  getNextUnicodeChar();
3075                 //                } else {
3076                 //                  if (withoutUnicodePtr != 0) {
3077                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3078                 //                      currentCharacter;
3079                 //                  }
3080                 //                }
3081               }
3082               if (!Character.isDigit(currentCharacter))
3083                 throw new InvalidInputException(INVALID_FLOAT);
3084               while (getNextCharAsDigit()) {
3085               };
3086             }
3087             //            if (getNextChar('f', 'F') >= 0)
3088             //              return TokenNameFloatingPointLiteral;
3089             getNextChar('d', 'D'); //jump over potential d or D
3090             return TokenNameDoubleLiteral;
3091           } else {
3092             return TokenNameIntegerLiteral;
3093           }
3094         }
3095       } else {
3096         /* carry on */
3097       }
3098     }
3099
3100     while (getNextCharAsDigit()) {
3101     };
3102
3103     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3104     //      return TokenNameLongLiteral;
3105
3106     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3107       while (getNextCharAsDigit()) {
3108       };
3109       floating = true;
3110     }
3111
3112     //if floating is true both exponant and suffix may be optional
3113
3114     if (getNextChar('e', 'E') >= 0) {
3115       floating = true;
3116       // consume next character
3117       unicodeAsBackSlash = false;
3118       currentCharacter = source[currentPosition++];
3119       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3120       //        && (source[currentPosition] == 'u')) {
3121       //        getNextUnicodeChar();
3122       //      } else {
3123       //        if (withoutUnicodePtr != 0) {
3124       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3125       //        }
3126       //      }
3127
3128       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3129         unicodeAsBackSlash = false;
3130         currentCharacter = source[currentPosition++];
3131         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3132         //          && (source[currentPosition] == 'u')) {
3133         //          getNextUnicodeChar();
3134         //        } else {
3135         //          if (withoutUnicodePtr != 0) {
3136         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3137         //          }
3138         //        }
3139       }
3140       if (!Character.isDigit(currentCharacter))
3141         throw new InvalidInputException(INVALID_FLOAT);
3142       while (getNextCharAsDigit()) {
3143       };
3144     }
3145
3146     if (getNextChar('d', 'D') >= 0)
3147       return TokenNameDoubleLiteral;
3148     //    if (getNextChar('f', 'F') >= 0)
3149     //      return TokenNameFloatingPointLiteral;
3150
3151     //the long flag has been tested before
3152
3153     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3154   }
3155   /**
3156    * Search the line number corresponding to a specific position
3157    *
3158    */
3159   public final int getLineNumber(int position) {
3160
3161     if (lineEnds == null)
3162       return 1;
3163     int length = linePtr + 1;
3164     if (length == 0)
3165       return 1;
3166     int g = 0, d = length - 1;
3167     int m = 0;
3168     while (g <= d) {
3169       m = (g + d) / 2;
3170       if (position < lineEnds[m]) {
3171         d = m - 1;
3172       } else if (position > lineEnds[m]) {
3173         g = m + 1;
3174       } else {
3175         return m + 1;
3176       }
3177     }
3178     if (position < lineEnds[m]) {
3179       return m + 1;
3180     }
3181     return m + 2;
3182   }
3183
3184   public void setPHPMode(boolean mode) {
3185     phpMode = mode;
3186   }
3187
3188   public final void setSource(char[] source) {
3189     //the source-buffer is set to sourceString
3190
3191     if (source == null) {
3192       this.source = new char[0];
3193     } else {
3194       this.source = source;
3195     }
3196     startPosition = -1;
3197     initialPosition = currentPosition = 0;
3198     containsAssertKeyword = false;
3199     withoutUnicodeBuffer = new char[this.source.length];
3200
3201   }
3202
3203   public String toString() {
3204     if (startPosition == source.length)
3205       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3206     if (currentPosition > source.length)
3207       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3208
3209     char front[] = new char[startPosition];
3210     System.arraycopy(source, 0, front, 0, startPosition);
3211
3212     int middleLength = (currentPosition - 1) - startPosition + 1;
3213     char middle[];
3214     if (middleLength > -1) {
3215       middle = new char[middleLength];
3216       System.arraycopy(source, startPosition, middle, 0, middleLength);
3217     } else {
3218       middle = new char[0];
3219     }
3220
3221     char end[] = new char[source.length - (currentPosition - 1)];
3222     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3223
3224     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3225     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3226     + new String(end);
3227   }
3228   public final String toStringAction(int act) {
3229
3230     switch (act) {
3231       case TokenNameERROR :
3232         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3233       case TokenNameStopPHP :
3234         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3235       case TokenNameIdentifier :
3236         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3237       case TokenNameVariable :
3238         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3239       case TokenNameas :
3240         return "as"; //$NON-NLS-1$
3241       case TokenNamebreak :
3242         return "break"; //$NON-NLS-1$
3243       case TokenNamecase :
3244         return "case"; //$NON-NLS-1$
3245       case TokenNameclass :
3246         return "class"; //$NON-NLS-1$
3247       case TokenNamecontinue :
3248         return "continue"; //$NON-NLS-1$
3249       case TokenNamedefault :
3250         return "default"; //$NON-NLS-1$
3251       case TokenNamedefine :
3252         return "define"; //$NON-NLS-1$
3253       case TokenNamedo :
3254         return "do"; //$NON-NLS-1$
3255       case TokenNameecho :
3256         return "echo"; //$NON-NLS-1$
3257       case TokenNameelse :
3258         return "else"; //$NON-NLS-1$
3259       case TokenNameelseif :
3260         return "elseif"; //$NON-NLS-1$
3261       case TokenNameendfor :
3262         return "endfor"; //$NON-NLS-1$
3263       case TokenNameendforeach :
3264         return "endforeach"; //$NON-NLS-1$
3265       case TokenNameendif :
3266         return "endif"; //$NON-NLS-1$
3267       case TokenNameendswitch :
3268         return "endswitch"; //$NON-NLS-1$
3269       case TokenNameendwhile :
3270         return "endwhile"; //$NON-NLS-1$
3271       case TokenNameextends :
3272         return "extends"; //$NON-NLS-1$
3273       case TokenNamefalse :
3274         return "false"; //$NON-NLS-1$
3275       case TokenNamefor :
3276         return "for"; //$NON-NLS-1$
3277       case TokenNameforeach :
3278         return "foreach"; //$NON-NLS-1$
3279       case TokenNamefunction :
3280         return "function"; //$NON-NLS-1$
3281       case TokenNameglobal :
3282         return "global"; //$NON-NLS-1$
3283       case TokenNameif :
3284         return "if"; //$NON-NLS-1$
3285       case TokenNameinclude :
3286         return "include"; //$NON-NLS-1$
3287       case TokenNameinclude_once :
3288         return "include_once"; //$NON-NLS-1$
3289       case TokenNamelist :
3290         return "list"; //$NON-NLS-1$
3291       case TokenNamenew :
3292         return "new"; //$NON-NLS-1$
3293       case TokenNamenull :
3294         return "null"; //$NON-NLS-1$
3295       case TokenNameprint :
3296         return "print"; //$NON-NLS-1$
3297       case TokenNamerequire :
3298         return "require"; //$NON-NLS-1$
3299       case TokenNamerequire_once :
3300         return "require_once"; //$NON-NLS-1$
3301       case TokenNamereturn :
3302         return "return"; //$NON-NLS-1$
3303       case TokenNamestatic :
3304         return "static"; //$NON-NLS-1$
3305       case TokenNameswitch :
3306         return "switch"; //$NON-NLS-1$
3307       case TokenNametrue :
3308         return "true"; //$NON-NLS-1$
3309       case TokenNamevar :
3310         return "var"; //$NON-NLS-1$
3311       case TokenNamewhile :
3312         return "while"; //$NON-NLS-1$
3313       case TokenNameIntegerLiteral :
3314         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3315       case TokenNameDoubleLiteral :
3316         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3317       case TokenNameStringLiteral :
3318         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3319       case TokenNameStringConstant :
3320         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3321       case TokenNameStringInterpolated :
3322         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3323       case TokenNameHEREDOC :
3324         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3325
3326       case TokenNamePLUS_PLUS :
3327         return "++"; //$NON-NLS-1$
3328       case TokenNameMINUS_MINUS :
3329         return "--"; //$NON-NLS-1$
3330       case TokenNameEQUAL_EQUAL :
3331         return "=="; //$NON-NLS-1$
3332       case TokenNameEQUAL_GREATER :
3333         return "=>"; //$NON-NLS-1$
3334       case TokenNameLESS_EQUAL :
3335         return "<="; //$NON-NLS-1$
3336       case TokenNameGREATER_EQUAL :
3337         return ">="; //$NON-NLS-1$
3338       case TokenNameNOT_EQUAL :
3339         return "!="; //$NON-NLS-1$
3340       case TokenNameLEFT_SHIFT :
3341         return "<<"; //$NON-NLS-1$
3342       case TokenNameRIGHT_SHIFT :
3343         return ">>"; //$NON-NLS-1$
3344       case TokenNamePLUS_EQUAL :
3345         return "+="; //$NON-NLS-1$
3346       case TokenNameMINUS_EQUAL :
3347         return "-="; //$NON-NLS-1$
3348       case TokenNameMULTIPLY_EQUAL :
3349         return "*="; //$NON-NLS-1$
3350       case TokenNameDIVIDE_EQUAL :
3351         return "/="; //$NON-NLS-1$
3352       case TokenNameAND_EQUAL :
3353         return "&="; //$NON-NLS-1$
3354       case TokenNameOR_EQUAL :
3355         return "|="; //$NON-NLS-1$
3356       case TokenNameXOR_EQUAL :
3357         return "^="; //$NON-NLS-1$
3358       case TokenNameREMAINDER_EQUAL :
3359         return "%="; //$NON-NLS-1$
3360       case TokenNameLEFT_SHIFT_EQUAL :
3361         return "<<="; //$NON-NLS-1$
3362       case TokenNameRIGHT_SHIFT_EQUAL :
3363         return ">>="; //$NON-NLS-1$
3364       case TokenNameOR_OR :
3365         return "||"; //$NON-NLS-1$
3366       case TokenNameAND_AND :
3367         return "&&"; //$NON-NLS-1$
3368       case TokenNamePLUS :
3369         return "+"; //$NON-NLS-1$
3370       case TokenNameMINUS :
3371         return "-"; //$NON-NLS-1$
3372       case TokenNameMINUS_GREATER :
3373         return "->";
3374       case TokenNameNOT :
3375         return "!"; //$NON-NLS-1$
3376       case TokenNameREMAINDER :
3377         return "%"; //$NON-NLS-1$
3378       case TokenNameXOR :
3379         return "^"; //$NON-NLS-1$
3380       case TokenNameAND :
3381         return "&"; //$NON-NLS-1$
3382       case TokenNameMULTIPLY :
3383         return "*"; //$NON-NLS-1$
3384       case TokenNameOR :
3385         return "|"; //$NON-NLS-1$
3386       case TokenNameTWIDDLE :
3387         return "~"; //$NON-NLS-1$
3388       case TokenNameTWIDDLE_EQUAL :
3389         return "~="; //$NON-NLS-1$
3390       case TokenNameDIVIDE :
3391         return "/"; //$NON-NLS-1$
3392       case TokenNameGREATER :
3393         return ">"; //$NON-NLS-1$
3394       case TokenNameLESS :
3395         return "<"; //$NON-NLS-1$
3396       case TokenNameLPAREN :
3397         return "("; //$NON-NLS-1$
3398       case TokenNameRPAREN :
3399         return ")"; //$NON-NLS-1$
3400       case TokenNameLBRACE :
3401         return "{"; //$NON-NLS-1$
3402       case TokenNameRBRACE :
3403         return "}"; //$NON-NLS-1$
3404       case TokenNameLBRACKET :
3405         return "["; //$NON-NLS-1$
3406       case TokenNameRBRACKET :
3407         return "]"; //$NON-NLS-1$
3408       case TokenNameSEMICOLON :
3409         return ";"; //$NON-NLS-1$
3410       case TokenNameQUESTION :
3411         return "?"; //$NON-NLS-1$
3412       case TokenNameCOLON :
3413         return ":"; //$NON-NLS-1$
3414       case TokenNameCOMMA :
3415         return ","; //$NON-NLS-1$
3416       case TokenNameDOT :
3417         return "."; //$NON-NLS-1$
3418       case TokenNameEQUAL :
3419         return "="; //$NON-NLS-1$
3420       case TokenNameAT :
3421         return "@";
3422       case TokenNameDOLLAR_LBRACE :
3423         return "${";
3424       case TokenNameEOF :
3425         return "EOF"; //$NON-NLS-1$
3426       case TokenNameWHITESPACE :
3427         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3428       case TokenNameCOMMENT_LINE :
3429         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3430       case TokenNameCOMMENT_BLOCK :
3431         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3432       case TokenNameCOMMENT_PHPDOC :
3433         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3434       case TokenNameHTML :
3435         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3436       default :
3437         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3438     }
3439   }
3440
3441   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3442     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3443   }
3444
3445   public Scanner(
3446     boolean tokenizeComments,
3447     boolean tokenizeWhiteSpace,
3448     boolean checkNonExternalizedStringLiterals,
3449     boolean assertMode) {
3450     this.eofPosition = Integer.MAX_VALUE;
3451     this.tokenizeComments = tokenizeComments;
3452     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3453     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3454     this.assertMode = assertMode;
3455   }
3456
3457   private void checkNonExternalizeString() throws InvalidInputException {
3458     if (currentLine == null)
3459       return;
3460     parseTags(currentLine);
3461   }
3462
3463   private void parseTags(NLSLine line) throws InvalidInputException {
3464     String s = new String(getCurrentTokenSource());
3465     int pos = s.indexOf(TAG_PREFIX);
3466     int lineLength = line.size();
3467     while (pos != -1) {
3468       int start = pos + TAG_PREFIX_LENGTH;
3469       int end = s.indexOf(TAG_POSTFIX, start);
3470       String index = s.substring(start, end);
3471       int i = 0;
3472       try {
3473         i = Integer.parseInt(index) - 1;
3474         // Tags are one based not zero based.
3475       } catch (NumberFormatException e) {
3476         i = -1; // we don't want to consider this as a valid NLS tag
3477       }
3478       if (line.exists(i)) {
3479         line.set(i, null);
3480       }
3481       pos = s.indexOf(TAG_PREFIX, start);
3482     }
3483
3484     this.nonNLSStrings = new StringLiteral[lineLength];
3485     int nonNLSCounter = 0;
3486     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3487       StringLiteral literal = (StringLiteral) iterator.next();
3488       if (literal != null) {
3489         this.nonNLSStrings[nonNLSCounter++] = literal;
3490       }
3491     }
3492     if (nonNLSCounter == 0) {
3493       this.nonNLSStrings = null;
3494       currentLine = null;
3495       return;
3496     }
3497     this.wasNonExternalizedStringLiteral = true;
3498     if (nonNLSCounter != lineLength) {
3499       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3500     }
3501     currentLine = null;
3502   }
3503 }