bef873affe58b9924406f1a1f278a1200adb63ff
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import net.sourceforge.phpdt.core.compiler.CharOperation;
16 import net.sourceforge.phpdt.core.compiler.IScanner;
17 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
18 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
19 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
20 public class Scanner implements IScanner, ITerminalSymbols {
21   /*
22    * APIs ares - getNextToken() which return the current type of the token
23    * (this value is not memorized by the scanner) - getCurrentTokenSource()
24    * which provides with the token "REAL" source (aka all unicode have been
25    * transformed into a correct char) - sourceStart gives the position into the
26    * stream - currentPosition-1 gives the sourceEnd position into the stream
27    */
28   // 1.4 feature
29   private boolean assertMode;
30   public boolean useAssertAsAnIndentifier = false;
31   //flag indicating if processed source contains occurrences of keyword assert
32   public boolean containsAssertKeyword = false;
33   public boolean recordLineSeparator;
34   public boolean phpMode = false;
35   public char currentCharacter;
36   public int startPosition;
37   public int currentPosition;
38   public int initialPosition, eofPosition;
39   // after this position eof are generated instead of real token from the
40   // source
41   public boolean tokenizeComments;
42   public boolean tokenizeWhiteSpace;
43   //source should be viewed as a window (aka a part)
44   //of a entire very large stream
45   public char source[];
46   //unicode support
47   public char[] withoutUnicodeBuffer;
48   public int withoutUnicodePtr;
49   //when == 0 ==> no unicode in the current token
50   public boolean unicodeAsBackSlash = false;
51   public boolean scanningFloatLiteral = false;
52   //support for /** comments
53   //public char[][] comments = new char[10][];
54   public int[] commentStops = new int[10];
55   public int[] commentStarts = new int[10];
56   public int commentPtr = -1; // no comment test with commentPtr value -1
57   //diet parsing support - jump over some method body when requested
58   public boolean diet = false;
59   //support for the poor-line-debuggers ....
60   //remember the position of the cr/lf
61   public int[] lineEnds = new int[250];
62   public int linePtr = -1;
63   public boolean wasAcr = false;
64   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
65   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
66   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
67   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
68   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
69   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
70   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
71   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
72   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
73   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
74   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
75   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
76   //----------------optimized identifier managment------------------
77   static final char[] charArray_a = new char[]{'a'},
78       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
79       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
80       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
81       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
82       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
83       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
84       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
85       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
86       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
87       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
88       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
89       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
90       charArray_z = new char[]{'z'};
91   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
92       '\u0000', '\u0000', '\u0000'};
93   static final int TableSize = 30, InternalTableSize = 6;
94   //30*6 = 180 entries
95   public static final int OptimizedLength = 6;
96   public/* static */
97   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
98   // support for detecting non-externalized string literals
99   int currentLineNr = -1;
100   int previousLineNr = -1;
101   NLSLine currentLine = null;
102   List lines = new ArrayList();
103   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
104   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
105   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
106   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
107   public StringLiteral[] nonNLSStrings = null;
108   public boolean checkNonExternalizedStringLiterals = true;
109   public boolean wasNonExternalizedStringLiteral = false;
110   /* static */{
111     for (int i = 0; i < 6; i++) {
112       for (int j = 0; j < TableSize; j++) {
113         for (int k = 0; k < InternalTableSize; k++) {
114           charArray_length[i][j][k] = initCharArray;
115         }
116       }
117     }
118   }
119   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
120       newEntry6 = 0;
121   public static final int RoundBracket = 0;
122   public static final int SquareBracket = 1;
123   public static final int CurlyBracket = 2;
124   public static final int BracketKinds = 3;
125   // task tag support
126   public char[][] foundTaskTags = null;
127   public char[][] foundTaskMessages;
128   public char[][] foundTaskPriorities = null;
129   public int[][] foundTaskPositions;
130   public int foundTaskCount = 0;
131   public char[][] taskTags = null;
132   public char[][] taskPriorities = null;
133   public static final boolean DEBUG = false;
134   public Scanner() {
135     this(false, false);
136   }
137   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
138     this(tokenizeComments, tokenizeWhiteSpace, false);
139   }
140   /**
141    * Determines if the specified character is permissible as the first
142    * character in a PHP identifier
143    */
144   public static boolean isPHPIdentifierStart(char ch) {
145     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
146   }
147   /**
148    * Determines if the specified character may be part of a PHP identifier as
149    * other than the first character
150    */
151   public static boolean isPHPIdentifierPart(char ch) {
152     return Character.isLetterOrDigit(ch) || (ch == '_')
153         || (0x7F <= ch && ch <= 0xFF);
154   }
155   public final boolean atEnd() {
156     // This code is not relevant if source is
157     // Only a part of the real stream input
158     return source.length == currentPosition;
159   }
160   public char[] getCurrentIdentifierSource() {
161     //return the token REAL source (aka unicodes are precomputed)
162     char[] result;
163     //    if (withoutUnicodePtr != 0)
164     //      //0 is used as a fast test flag so the real first char is in position 1
165     //      System.arraycopy(
166     //        withoutUnicodeBuffer,
167     //        1,
168     //        result = new char[withoutUnicodePtr],
169     //        0,
170     //        withoutUnicodePtr);
171     //    else {
172     int length = currentPosition - startPosition;
173     switch (length) { // see OptimizedLength
174       case 1 :
175         return optimizedCurrentTokenSource1();
176       case 2 :
177         return optimizedCurrentTokenSource2();
178       case 3 :
179         return optimizedCurrentTokenSource3();
180       case 4 :
181         return optimizedCurrentTokenSource4();
182       case 5 :
183         return optimizedCurrentTokenSource5();
184       case 6 :
185         return optimizedCurrentTokenSource6();
186     }
187     //no optimization
188     System.arraycopy(source, startPosition, result = new char[length], 0,
189         length);
190     //   }
191     return result;
192   }
193   public int getCurrentTokenEndPosition() {
194     return this.currentPosition - 1;
195   }
196   public final char[] getCurrentTokenSource() {
197     // Return the token REAL source (aka unicodes are precomputed)
198     char[] result;
199     //    if (withoutUnicodePtr != 0)
200     //      // 0 is used as a fast test flag so the real first char is in position 1
201     //      System.arraycopy(
202     //        withoutUnicodeBuffer,
203     //        1,
204     //        result = new char[withoutUnicodePtr],
205     //        0,
206     //        withoutUnicodePtr);
207     //    else {
208     int length;
209     System.arraycopy(source, startPosition,
210         result = new char[length = currentPosition - startPosition], 0, length);
211     //    }
212     return result;
213   }
214   public final char[] getCurrentTokenSource(int startPos) {
215     // Return the token REAL source (aka unicodes are precomputed)
216     char[] result;
217     //    if (withoutUnicodePtr != 0)
218     //      // 0 is used as a fast test flag so the real first char is in position 1
219     //      System.arraycopy(
220     //        withoutUnicodeBuffer,
221     //        1,
222     //        result = new char[withoutUnicodePtr],
223     //        0,
224     //        withoutUnicodePtr);
225     //    else {
226     int length;
227     System.arraycopy(source, startPos,
228         result = new char[length = currentPosition - startPos], 0, length);
229     //  }
230     return result;
231   }
232   public final char[] getCurrentTokenSourceString() {
233     //return the token REAL source (aka unicodes are precomputed).
234     //REMOVE the two " that are at the beginning and the end.
235     char[] result;
236     if (withoutUnicodePtr != 0)
237       //0 is used as a fast test flag so the real first char is in position 1
238       System.arraycopy(withoutUnicodeBuffer, 2, 
239       //2 is 1 (real start) + 1 (to jump over the ")
240           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
241     else {
242       int length;
243       System.arraycopy(source, startPosition + 1,
244           result = new char[length = currentPosition - startPosition - 2], 0,
245           length);
246     }
247     return result;
248   }
249   public int getCurrentTokenStartPosition() {
250     return this.startPosition;
251   }
252   public final char[] getCurrentStringLiteralSource() {
253     // Return the token REAL source (aka unicodes are precomputed)
254     char[] result;
255     int length;
256     System.arraycopy(source, startPosition + 1,
257         result = new char[length = currentPosition - startPosition - 2], 0,
258         length);
259     //    }
260     return result;
261   }
262   /*
263    * Search the source position corresponding to the end of a given line number
264    * 
265    * Line numbers are 1-based, and relative to the scanner initialPosition.
266    * Character positions are 0-based.
267    * 
268    * In case the given line number is inconsistent, answers -1.
269    */
270   public final int getLineEnd(int lineNumber) {
271     if (lineEnds == null)
272       return -1;
273     if (lineNumber >= lineEnds.length)
274       return -1;
275     if (lineNumber <= 0)
276       return -1;
277     if (lineNumber == lineEnds.length - 1)
278       return eofPosition;
279     return lineEnds[lineNumber - 1];
280     // next line start one character behind the lineEnd of the previous line
281   }
282   /**
283    * Search the source position corresponding to the beginning of a given line
284    * number
285    * 
286    * Line numbers are 1-based, and relative to the scanner initialPosition.
287    * Character positions are 0-based.
288    * 
289    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
290    * 
291    * In case the given line number is inconsistent, answers -1.
292    */
293   public final int getLineStart(int lineNumber) {
294     if (lineEnds == null)
295       return -1;
296     if (lineNumber >= lineEnds.length)
297       return -1;
298     if (lineNumber <= 0)
299       return -1;
300     if (lineNumber == 1)
301       return initialPosition;
302     return lineEnds[lineNumber - 2] + 1;
303     // next line start one character behind the lineEnd of the previous line
304   }
305   public final boolean getNextChar(char testedChar) {
306     //BOOLEAN
307     //handle the case of unicode.
308     //when a unicode appears then we must use a buffer that holds char
309     // internal values
310     //At the end of this method currentCharacter holds the new visited char
311     //and currentPosition points right next after it
312     //Both previous lines are true if the currentCharacter is == to the
313     // testedChar
314     //On false, no side effect has occured.
315     //ALL getNextChar.... ARE OPTIMIZED COPIES
316     int temp = currentPosition;
317     try {
318       currentCharacter = source[currentPosition++];
319       //      if (((currentCharacter = source[currentPosition++]) == '\\')
320       //        && (source[currentPosition] == 'u')) {
321       //        //-------------unicode traitement ------------
322       //        int c1, c2, c3, c4;
323       //        int unicodeSize = 6;
324       //        currentPosition++;
325       //        while (source[currentPosition] == 'u') {
326       //          currentPosition++;
327       //          unicodeSize++;
328       //        }
329       //
330       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
331       //          || c1 < 0)
332       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
333       //            || c2 < 0)
334       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
335       //            || c3 < 0)
336       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
337       //            || c4 < 0)) {
338       //          currentPosition = temp;
339       //          return false;
340       //        }
341       //
342       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
343       //        if (currentCharacter != testedChar) {
344       //          currentPosition = temp;
345       //          return false;
346       //        }
347       //        unicodeAsBackSlash = currentCharacter == '\\';
348       //
349       //        //need the unicode buffer
350       //        if (withoutUnicodePtr == 0) {
351       //          //buffer all the entries that have been left aside....
352       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
353       //          System.arraycopy(
354       //            source,
355       //            startPosition,
356       //            withoutUnicodeBuffer,
357       //            1,
358       //            withoutUnicodePtr);
359       //        }
360       //        //fill the buffer with the char
361       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
362       //        return true;
363       //
364       //      } //-------------end unicode traitement--------------
365       //      else {
366       if (currentCharacter != testedChar) {
367         currentPosition = temp;
368         return false;
369       }
370       unicodeAsBackSlash = false;
371       //        if (withoutUnicodePtr != 0)
372       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
373       return true;
374       //      }
375     } catch (IndexOutOfBoundsException e) {
376       unicodeAsBackSlash = false;
377       currentPosition = temp;
378       return false;
379     }
380   }
381   public final int getNextChar(char testedChar1, char testedChar2) {
382     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
383     //test can be done with (x==0) for the first and (x>0) for the second
384     //handle the case of unicode.
385     //when a unicode appears then we must use a buffer that holds char
386     // internal values
387     //At the end of this method currentCharacter holds the new visited char
388     //and currentPosition points right next after it
389     //Both previous lines are true if the currentCharacter is == to the
390     // testedChar1/2
391     //On false, no side effect has occured.
392     //ALL getNextChar.... ARE OPTIMIZED COPIES
393     int temp = currentPosition;
394     try {
395       int result;
396       currentCharacter = source[currentPosition++];
397       //      if (((currentCharacter = source[currentPosition++]) == '\\')
398       //        && (source[currentPosition] == 'u')) {
399       //        //-------------unicode traitement ------------
400       //        int c1, c2, c3, c4;
401       //        int unicodeSize = 6;
402       //        currentPosition++;
403       //        while (source[currentPosition] == 'u') {
404       //          currentPosition++;
405       //          unicodeSize++;
406       //        }
407       //
408       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
409       //          || c1 < 0)
410       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
411       //            || c2 < 0)
412       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
413       //            || c3 < 0)
414       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
415       //            || c4 < 0)) {
416       //          currentPosition = temp;
417       //          return 2;
418       //        }
419       //
420       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
421       //        if (currentCharacter == testedChar1)
422       //          result = 0;
423       //        else if (currentCharacter == testedChar2)
424       //          result = 1;
425       //        else {
426       //          currentPosition = temp;
427       //          return -1;
428       //        }
429       //
430       //        //need the unicode buffer
431       //        if (withoutUnicodePtr == 0) {
432       //          //buffer all the entries that have been left aside....
433       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
434       //          System.arraycopy(
435       //            source,
436       //            startPosition,
437       //            withoutUnicodeBuffer,
438       //            1,
439       //            withoutUnicodePtr);
440       //        }
441       //        //fill the buffer with the char
442       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
443       //        return result;
444       //      } //-------------end unicode traitement--------------
445       //      else {
446       if (currentCharacter == testedChar1)
447         result = 0;
448       else if (currentCharacter == testedChar2)
449         result = 1;
450       else {
451         currentPosition = temp;
452         return -1;
453       }
454       //        if (withoutUnicodePtr != 0)
455       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
456       return result;
457       //     }
458     } catch (IndexOutOfBoundsException e) {
459       currentPosition = temp;
460       return -1;
461     }
462   }
463   public final boolean getNextCharAsDigit() {
464     //BOOLEAN
465     //handle the case of unicode.
466     //when a unicode appears then we must use a buffer that holds char
467     // internal values
468     //At the end of this method currentCharacter holds the new visited char
469     //and currentPosition points right next after it
470     //Both previous lines are true if the currentCharacter is a digit
471     //On false, no side effect has occured.
472     //ALL getNextChar.... ARE OPTIMIZED COPIES
473     int temp = currentPosition;
474     try {
475       currentCharacter = source[currentPosition++];
476       //      if (((currentCharacter = source[currentPosition++]) == '\\')
477       //        && (source[currentPosition] == 'u')) {
478       //        //-------------unicode traitement ------------
479       //        int c1, c2, c3, c4;
480       //        int unicodeSize = 6;
481       //        currentPosition++;
482       //        while (source[currentPosition] == 'u') {
483       //          currentPosition++;
484       //          unicodeSize++;
485       //        }
486       //
487       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
488       //          || c1 < 0)
489       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
490       //            || c2 < 0)
491       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
492       //            || c3 < 0)
493       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
494       //            || c4 < 0)) {
495       //          currentPosition = temp;
496       //          return false;
497       //        }
498       //
499       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
500       //        if (!Character.isDigit(currentCharacter)) {
501       //          currentPosition = temp;
502       //          return false;
503       //        }
504       //
505       //        //need the unicode buffer
506       //        if (withoutUnicodePtr == 0) {
507       //          //buffer all the entries that have been left aside....
508       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
509       //          System.arraycopy(
510       //            source,
511       //            startPosition,
512       //            withoutUnicodeBuffer,
513       //            1,
514       //            withoutUnicodePtr);
515       //        }
516       //        //fill the buffer with the char
517       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
518       //        return true;
519       //      } //-------------end unicode traitement--------------
520       //      else {
521       if (!Character.isDigit(currentCharacter)) {
522         currentPosition = temp;
523         return false;
524       }
525       //        if (withoutUnicodePtr != 0)
526       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
527       return true;
528       //      }
529     } catch (IndexOutOfBoundsException e) {
530       currentPosition = temp;
531       return false;
532     }
533   }
534   public final boolean getNextCharAsDigit(int radix) {
535     //BOOLEAN
536     //handle the case of unicode.
537     //when a unicode appears then we must use a buffer that holds char
538     // internal values
539     //At the end of this method currentCharacter holds the new visited char
540     //and currentPosition points right next after it
541     //Both previous lines are true if the currentCharacter is a digit base on
542     // radix
543     //On false, no side effect has occured.
544     //ALL getNextChar.... ARE OPTIMIZED COPIES
545     int temp = currentPosition;
546     try {
547       currentCharacter = source[currentPosition++];
548       //      if (((currentCharacter = source[currentPosition++]) == '\\')
549       //        && (source[currentPosition] == 'u')) {
550       //        //-------------unicode traitement ------------
551       //        int c1, c2, c3, c4;
552       //        int unicodeSize = 6;
553       //        currentPosition++;
554       //        while (source[currentPosition] == 'u') {
555       //          currentPosition++;
556       //          unicodeSize++;
557       //        }
558       //
559       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
560       //          || c1 < 0)
561       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
562       //            || c2 < 0)
563       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
564       //            || c3 < 0)
565       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
566       //            || c4 < 0)) {
567       //          currentPosition = temp;
568       //          return false;
569       //        }
570       //
571       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
572       //        if (Character.digit(currentCharacter, radix) == -1) {
573       //          currentPosition = temp;
574       //          return false;
575       //        }
576       //
577       //        //need the unicode buffer
578       //        if (withoutUnicodePtr == 0) {
579       //          //buffer all the entries that have been left aside....
580       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
581       //          System.arraycopy(
582       //            source,
583       //            startPosition,
584       //            withoutUnicodeBuffer,
585       //            1,
586       //            withoutUnicodePtr);
587       //        }
588       //        //fill the buffer with the char
589       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
590       //        return true;
591       //      } //-------------end unicode traitement--------------
592       //      else {
593       if (Character.digit(currentCharacter, radix) == -1) {
594         currentPosition = temp;
595         return false;
596       }
597       //        if (withoutUnicodePtr != 0)
598       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
599       return true;
600       //      }
601     } catch (IndexOutOfBoundsException e) {
602       currentPosition = temp;
603       return false;
604     }
605   }
606   public boolean getNextCharAsJavaIdentifierPart() {
607     //BOOLEAN
608     //handle the case of unicode.
609     //when a unicode appears then we must use a buffer that holds char
610     // internal values
611     //At the end of this method currentCharacter holds the new visited char
612     //and currentPosition points right next after it
613     //Both previous lines are true if the currentCharacter is a
614     // JavaIdentifierPart
615     //On false, no side effect has occured.
616     //ALL getNextChar.... ARE OPTIMIZED COPIES
617     int temp = currentPosition;
618     try {
619       currentCharacter = source[currentPosition++];
620       //      if (((currentCharacter = source[currentPosition++]) == '\\')
621       //        && (source[currentPosition] == 'u')) {
622       //        //-------------unicode traitement ------------
623       //        int c1, c2, c3, c4;
624       //        int unicodeSize = 6;
625       //        currentPosition++;
626       //        while (source[currentPosition] == 'u') {
627       //          currentPosition++;
628       //          unicodeSize++;
629       //        }
630       //
631       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
632       //          || c1 < 0)
633       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
634       //            || c2 < 0)
635       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
636       //            || c3 < 0)
637       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
638       //            || c4 < 0)) {
639       //          currentPosition = temp;
640       //          return false;
641       //        }
642       //
643       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
644       //        if (!isPHPIdentifierPart(currentCharacter)) {
645       //          currentPosition = temp;
646       //          return false;
647       //        }
648       //
649       //        //need the unicode buffer
650       //        if (withoutUnicodePtr == 0) {
651       //          //buffer all the entries that have been left aside....
652       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
653       //          System.arraycopy(
654       //            source,
655       //            startPosition,
656       //            withoutUnicodeBuffer,
657       //            1,
658       //            withoutUnicodePtr);
659       //        }
660       //        //fill the buffer with the char
661       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
662       //        return true;
663       //      } //-------------end unicode traitement--------------
664       //      else {
665       if (!isPHPIdentifierPart(currentCharacter)) {
666         currentPosition = temp;
667         return false;
668       }
669       //        if (withoutUnicodePtr != 0)
670       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
671       return true;
672       //      }
673     } catch (IndexOutOfBoundsException e) {
674       currentPosition = temp;
675       return false;
676     }
677   }
678   public int getNextToken() throws InvalidInputException {
679     int htmlPosition = currentPosition;
680     try {
681       while (!phpMode) {
682         currentCharacter = source[currentPosition++];
683         if (currentCharacter == '<') {
684           if (getNextChar('?')) {
685             currentCharacter = source[currentPosition++];
686             if ((currentCharacter == ' ')
687                 || Character.isWhitespace(currentCharacter)) {
688               // <?
689               startPosition = currentPosition;
690               phpMode = true;
691               if (tokenizeWhiteSpace) {
692                 // && (whiteStart != currentPosition - 1)) {
693                 // reposition scanner in case we are interested by spaces as
694                 // tokens
695                 startPosition = htmlPosition;
696                 return TokenNameHTML;
697               }
698             } else {
699               boolean phpStart = (currentCharacter == 'P')
700                   || (currentCharacter == 'p');
701               if (phpStart) {
702                 int test = getNextChar('H', 'h');
703                 if (test >= 0) {
704                   test = getNextChar('P', 'p');
705                   if (test >= 0) {
706                     // <?PHP <?php
707                     startPosition = currentPosition;
708                     phpMode = true;
709                     if (tokenizeWhiteSpace) {
710                       // && (whiteStart != currentPosition - 1)) {
711                       // reposition scanner in case we are interested by spaces
712                       // as tokens
713                       startPosition = htmlPosition;
714                       return TokenNameHTML;
715                     }
716                   }
717                 }
718               }
719             }
720           }
721         }
722         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
723           if (recordLineSeparator) {
724             pushLineSeparator();
725           } else {
726             currentLine = null;
727           }
728         }
729       }
730     } //-----------------end switch while try--------------------
731     catch (IndexOutOfBoundsException e) {
732       if (tokenizeWhiteSpace) {
733         // && (whiteStart != currentPosition - 1)) {
734         // reposition scanner in case we are interested by spaces as tokens
735         startPosition = htmlPosition;
736       }
737       return TokenNameEOF;
738     }
739     if (phpMode) {
740       this.wasAcr = false;
741       if (diet) {
742         jumpOverMethodBody();
743         diet = false;
744         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
745       }
746       try {
747         while (true) { //loop for jumping over comments
748           withoutUnicodePtr = 0;
749           //start with a new token (even comment written with unicode )
750           // ---------Consume white space and handles startPosition---------
751           int whiteStart = currentPosition;
752           boolean isWhiteSpace;
753           do {
754             startPosition = currentPosition;
755             currentCharacter = source[currentPosition++];
756             //            if (((currentCharacter = source[currentPosition++]) == '\\')
757             //              && (source[currentPosition] == 'u')) {
758             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
759             //            } else {
760             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
761               checkNonExternalizeString();
762               if (recordLineSeparator) {
763                 pushLineSeparator();
764               } else {
765                 currentLine = null;
766               }
767             }
768             isWhiteSpace = (currentCharacter == ' ')
769                 || Character.isWhitespace(currentCharacter);
770             //            }
771           } while (isWhiteSpace);
772           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
773             // reposition scanner in case we are interested by spaces as tokens
774             currentPosition--;
775             startPosition = whiteStart;
776             return TokenNameWHITESPACE;
777           }
778           //little trick to get out in the middle of a source compuation
779           if (currentPosition > eofPosition)
780             return TokenNameEOF;
781           // ---------Identify the next token-------------
782           switch (currentCharacter) {
783             case '(' :
784               return TokenNameLPAREN;
785             case ')' :
786               return TokenNameRPAREN;
787             case '{' :
788               return TokenNameLBRACE;
789             case '}' :
790               return TokenNameRBRACE;
791             case '[' :
792               return TokenNameLBRACKET;
793             case ']' :
794               return TokenNameRBRACKET;
795             case ';' :
796               return TokenNameSEMICOLON;
797             case ',' :
798               return TokenNameCOMMA;
799             case '.' :
800               if (getNextCharAsDigit())
801                 return scanNumber(true);
802               return TokenNameDOT;
803             case '+' :
804               {
805                 int test;
806                 if ((test = getNextChar('+', '=')) == 0)
807                   return TokenNamePLUS_PLUS;
808                 if (test > 0)
809                   return TokenNamePLUS_EQUAL;
810                 return TokenNamePLUS;
811               }
812             case '-' :
813               {
814                 int test;
815                 if ((test = getNextChar('-', '=')) == 0)
816                   return TokenNameMINUS_MINUS;
817                 if (test > 0)
818                   return TokenNameMINUS_EQUAL;
819                 if (getNextChar('>'))
820                   return TokenNameMINUS_GREATER;
821                 return TokenNameMINUS;
822               }
823             case '~' :
824               if (getNextChar('='))
825                 return TokenNameTWIDDLE_EQUAL;
826               return TokenNameTWIDDLE;
827             case '!' :
828               if (getNextChar('=')) {
829                 if (getNextChar('=')) {
830                   return TokenNameNOT_EQUAL_EQUAL;
831                 }
832                 return TokenNameNOT_EQUAL;
833               }
834               return TokenNameNOT;
835             case '*' :
836               if (getNextChar('='))
837                 return TokenNameMULTIPLY_EQUAL;
838               return TokenNameMULTIPLY;
839             case '%' :
840               if (getNextChar('='))
841                 return TokenNameREMAINDER_EQUAL;
842               return TokenNameREMAINDER;
843             case '<' :
844               {
845                 int test;
846                 if ((test = getNextChar('=', '<')) == 0)
847                   return TokenNameLESS_EQUAL;
848                 if (test > 0) {
849                   if (getNextChar('='))
850                     return TokenNameLEFT_SHIFT_EQUAL;
851                   if (getNextChar('<')) {
852                     int heredocStart = currentPosition;
853                     int heredocLength = 0;
854                     currentCharacter = source[currentPosition++];
855                     if (isPHPIdentifierStart(currentCharacter)) {
856                       currentCharacter = source[currentPosition++];
857                     } else {
858                       return TokenNameERROR;
859                     }
860                     while (isPHPIdentifierPart(currentCharacter)) {
861                       currentCharacter = source[currentPosition++];
862                     }
863                     heredocLength = currentPosition - heredocStart - 1;
864                     // heredoc end-tag determination
865                     boolean endTag = true;
866                     char ch;
867                     do {
868                       ch = source[currentPosition++];
869                       if (ch == '\r' || ch == '\n') {
870                         if (recordLineSeparator) {
871                           pushLineSeparator();
872                         } else {
873                           currentLine = null;
874                         }
875                         for (int i = 0; i < heredocLength; i++) {
876                           if (source[currentPosition + i] != source[heredocStart
877                               + i]) {
878                             endTag = false;
879                             break;
880                           }
881                         }
882                         if (endTag) {
883                           currentPosition += heredocLength - 1;
884                           currentCharacter = source[currentPosition++];
885                           break; // do...while loop
886                         } else {
887                           endTag = true;
888                         }
889                       }
890                     } while (true);
891                     return TokenNameHEREDOC;
892                   }
893                   return TokenNameLEFT_SHIFT;
894                 }
895                 return TokenNameLESS;
896               }
897             case '>' :
898               {
899                 int test;
900                 if ((test = getNextChar('=', '>')) == 0)
901                   return TokenNameGREATER_EQUAL;
902                 if (test > 0) {
903                   if ((test = getNextChar('=', '>')) == 0)
904                     return TokenNameRIGHT_SHIFT_EQUAL;
905                   return TokenNameRIGHT_SHIFT;
906                 }
907                 return TokenNameGREATER;
908               }
909             case '=' :
910               if (getNextChar('=')) {
911                 if (getNextChar('=')) {
912                   return TokenNameEQUAL_EQUAL_EQUAL;
913                 }
914                 return TokenNameEQUAL_EQUAL;
915               }
916               if (getNextChar('>'))
917                 return TokenNameEQUAL_GREATER;
918               return TokenNameEQUAL;
919             case '&' :
920               {
921                 int test;
922                 if ((test = getNextChar('&', '=')) == 0)
923                   return TokenNameAND_AND;
924                 if (test > 0)
925                   return TokenNameAND_EQUAL;
926                 return TokenNameAND;
927               }
928             case '|' :
929               {
930                 int test;
931                 if ((test = getNextChar('|', '=')) == 0)
932                   return TokenNameOR_OR;
933                 if (test > 0)
934                   return TokenNameOR_EQUAL;
935                 return TokenNameOR;
936               }
937             case '^' :
938               if (getNextChar('='))
939                 return TokenNameXOR_EQUAL;
940               return TokenNameXOR;
941             case '?' :
942               if (getNextChar('>')) {
943                 phpMode = false;
944                 return TokenNameStopPHP;
945               }
946               return TokenNameQUESTION;
947             case ':' :
948               if (getNextChar(':'))
949                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
950               return TokenNameCOLON;
951             case '@' :
952               return TokenNameAT;
953             //                                  case '\'' :
954             //                                          {
955             //                                                  int test;
956             //                                                  if ((test = getNextChar('\n', '\r')) == 0) {
957             //                                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
958             //                                                  }
959             //                                                  if (test > 0) {
960             //                                                          // relocate if finding another quote fairly close: thus unicode
961             // '/u000D' will be fully consumed
962             //                                                          for (int lookAhead = 0;
963             //                                                                  lookAhead < 3;
964             //                                                                  lookAhead++) {
965             //                                                                  if (currentPosition + lookAhead
966             //                                                                          == source.length)
967             //                                                                          break;
968             //                                                                  if (source[currentPosition + lookAhead]
969             //                                                                          == '\n')
970             //                                                                          break;
971             //                                                                  if (source[currentPosition + lookAhead]
972             //                                                                          == '\'') {
973             //                                                                          currentPosition += lookAhead + 1;
974             //                                                                          break;
975             //                                                                  }
976             //                                                          }
977             //                                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
978             //                                                  }
979             //                                          }
980             //                                          if (getNextChar('\'')) {
981             //                                                  // relocate if finding another quote fairly close: thus unicode
982             // '/u000D' will be fully consumed
983             //                                                  for (int lookAhead = 0;
984             //                                                          lookAhead < 3;
985             //                                                          lookAhead++) {
986             //                                                          if (currentPosition + lookAhead
987             //                                                                  == source.length)
988             //                                                                  break;
989             //                                                          if (source[currentPosition + lookAhead]
990             //                                                                  == '\n')
991             //                                                                  break;
992             //                                                          if (source[currentPosition + lookAhead]
993             //                                                                  == '\'') {
994             //                                                                  currentPosition += lookAhead + 1;
995             //                                                                  break;
996             //                                                          }
997             //                                                  }
998             //                                                  throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
999             //                                          }
1000             //                                          if (getNextChar('\\'))
1001             //                                                  scanEscapeCharacter();
1002             //                                          else { // consume next character
1003             //                                                  unicodeAsBackSlash = false;
1004             //                                                  if (((currentCharacter = source[currentPosition++])
1005             //                                                          == '\\')
1006             //                                                          && (source[currentPosition] == 'u')) {
1007             //                                                          getNextUnicodeChar();
1008             //                                                  } else {
1009             //                                                          if (withoutUnicodePtr != 0) {
1010             //                                                                  withoutUnicodeBuffer[++withoutUnicodePtr] =
1011             //                                                                          currentCharacter;
1012             //                                                          }
1013             //                                                  }
1014             //                                          }
1015             //                                          // if (getNextChar('\''))
1016             //                                          // return TokenNameCharacterLiteral;
1017             //                                          // relocate if finding another quote fairly close: thus unicode
1018             // '/u000D' will be fully consumed
1019             //                                          for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1020             //                                                  if (currentPosition + lookAhead == source.length)
1021             //                                                          break;
1022             //                                                  if (source[currentPosition + lookAhead] == '\n')
1023             //                                                          break;
1024             //                                                  if (source[currentPosition + lookAhead] == '\'') {
1025             //                                                          currentPosition += lookAhead + 1;
1026             //                                                          break;
1027             //                                                  }
1028             //                                          }
1029             //                                          throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1030             case '\'' :
1031               try {
1032                 // consume next character
1033                 unicodeAsBackSlash = false;
1034                 currentCharacter = source[currentPosition++];
1035                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1036                 //                  && (source[currentPosition] == 'u')) {
1037                 //                  getNextUnicodeChar();
1038                 //                } else {
1039                 //                  if (withoutUnicodePtr != 0) {
1040                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1041                 //                      currentCharacter;
1042                 //                  }
1043                 //                }
1044                 while (currentCharacter != '\'') {
1045                   /** ** in PHP \r and \n are valid in string literals *** */
1046                   //                  if ((currentCharacter == '\n')
1047                   //                    || (currentCharacter == '\r')) {
1048                   //                    // relocate if finding another quote fairly close: thus
1049                   // unicode '/u000D' will be fully consumed
1050                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1051                   //                      if (currentPosition + lookAhead == source.length)
1052                   //                        break;
1053                   //                      if (source[currentPosition + lookAhead] == '\n')
1054                   //                        break;
1055                   //                      if (source[currentPosition + lookAhead] == '\"') {
1056                   //                        currentPosition += lookAhead + 1;
1057                   //                        break;
1058                   //                      }
1059                   //                    }
1060                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1061                   //                  }
1062                   if (currentCharacter == '\\') {
1063                     int escapeSize = currentPosition;
1064                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1065                     //scanEscapeCharacter make a side effect on this value and
1066                     // we need the previous value few lines down this one
1067                     scanSingleQuotedEscapeCharacter();
1068                     escapeSize = currentPosition - escapeSize;
1069                     if (withoutUnicodePtr == 0) {
1070                       //buffer all the entries that have been left aside....
1071                       withoutUnicodePtr = currentPosition - escapeSize - 1
1072                           - startPosition;
1073                       System.arraycopy(source, startPosition,
1074                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1075                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1076                     } else { //overwrite the / in the buffer
1077                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1078                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1079                         // the stream where
1080                         // only one is correct
1081                         withoutUnicodePtr--;
1082                       }
1083                     }
1084                   }
1085                   // consume next character
1086                   unicodeAsBackSlash = false;
1087                   currentCharacter = source[currentPosition++];
1088                   //                  if (((currentCharacter = source[currentPosition++]) ==
1089                   // '\\')
1090                   //                    && (source[currentPosition] == 'u')) {
1091                   //                    getNextUnicodeChar();
1092                   //                  } else {
1093                   if (withoutUnicodePtr != 0) {
1094                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1095                   }
1096                   //                  }
1097                 }
1098               } catch (IndexOutOfBoundsException e) {
1099                 throw new InvalidInputException(UNTERMINATED_STRING);
1100               } catch (InvalidInputException e) {
1101                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1102                   // relocate if finding another quote fairly close: thus
1103                   // unicode '/u000D' will be fully consumed
1104                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1105                     if (currentPosition + lookAhead == source.length)
1106                       break;
1107                     if (source[currentPosition + lookAhead] == '\n')
1108                       break;
1109                     if (source[currentPosition + lookAhead] == '\'') {
1110                       currentPosition += lookAhead + 1;
1111                       break;
1112                     }
1113                   }
1114                 }
1115                 throw e; // rethrow
1116               }
1117               if (checkNonExternalizedStringLiterals) { // check for presence
1118                 // of NLS tags
1119                 // //$NON-NLS-?$ where
1120                 // ? is an int.
1121                 if (currentLine == null) {
1122                   currentLine = new NLSLine();
1123                   lines.add(currentLine);
1124                 }
1125                 currentLine.add(new StringLiteral(
1126                     getCurrentTokenSourceString(), startPosition,
1127                     currentPosition - 1));
1128               }
1129               return TokenNameStringConstant;
1130             case '"' :
1131               try {
1132                 // consume next character
1133                 unicodeAsBackSlash = false;
1134                 currentCharacter = source[currentPosition++];
1135                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1136                 //                  && (source[currentPosition] == 'u')) {
1137                 //                  getNextUnicodeChar();
1138                 //                } else {
1139                 //                  if (withoutUnicodePtr != 0) {
1140                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1141                 //                      currentCharacter;
1142                 //                  }
1143                 //                }
1144                 while (currentCharacter != '"') {
1145                   /** ** in PHP \r and \n are valid in string literals *** */
1146                   //                  if ((currentCharacter == '\n')
1147                   //                    || (currentCharacter == '\r')) {
1148                   //                    // relocate if finding another quote fairly close: thus
1149                   // unicode '/u000D' will be fully consumed
1150                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1151                   //                      if (currentPosition + lookAhead == source.length)
1152                   //                        break;
1153                   //                      if (source[currentPosition + lookAhead] == '\n')
1154                   //                        break;
1155                   //                      if (source[currentPosition + lookAhead] == '\"') {
1156                   //                        currentPosition += lookAhead + 1;
1157                   //                        break;
1158                   //                      }
1159                   //                    }
1160                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1161                   //                  }
1162                   if (currentCharacter == '\\') {
1163                     int escapeSize = currentPosition;
1164                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1165                     //scanEscapeCharacter make a side effect on this value and
1166                     // we need the previous value few lines down this one
1167                     scanDoubleQuotedEscapeCharacter();
1168                     escapeSize = currentPosition - escapeSize;
1169                     if (withoutUnicodePtr == 0) {
1170                       //buffer all the entries that have been left aside....
1171                       withoutUnicodePtr = currentPosition - escapeSize - 1
1172                           - startPosition;
1173                       System.arraycopy(source, startPosition,
1174                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1175                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1176                     } else { //overwrite the / in the buffer
1177                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1178                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1179                         // the stream where
1180                         // only one is correct
1181                         withoutUnicodePtr--;
1182                       }
1183                     }
1184                   }
1185                   // consume next character
1186                   unicodeAsBackSlash = false;
1187                   currentCharacter = source[currentPosition++];
1188                   //                  if (((currentCharacter = source[currentPosition++]) ==
1189                   // '\\')
1190                   //                    && (source[currentPosition] == 'u')) {
1191                   //                    getNextUnicodeChar();
1192                   //                  } else {
1193                   if (withoutUnicodePtr != 0) {
1194                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1195                   }
1196                   //                  }
1197                 }
1198               } catch (IndexOutOfBoundsException e) {
1199                 throw new InvalidInputException(UNTERMINATED_STRING);
1200               } catch (InvalidInputException e) {
1201                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1202                   // relocate if finding another quote fairly close: thus
1203                   // unicode '/u000D' will be fully consumed
1204                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1205                     if (currentPosition + lookAhead == source.length)
1206                       break;
1207                     if (source[currentPosition + lookAhead] == '\n')
1208                       break;
1209                     if (source[currentPosition + lookAhead] == '\"') {
1210                       currentPosition += lookAhead + 1;
1211                       break;
1212                     }
1213                   }
1214                 }
1215                 throw e; // rethrow
1216               }
1217               if (checkNonExternalizedStringLiterals) { // check for presence
1218                 // of NLS tags
1219                 // //$NON-NLS-?$ where
1220                 // ? is an int.
1221                 if (currentLine == null) {
1222                   currentLine = new NLSLine();
1223                   lines.add(currentLine);
1224                 }
1225                 currentLine.add(new StringLiteral(
1226                     getCurrentTokenSourceString(), startPosition,
1227                     currentPosition - 1));
1228               }
1229               return TokenNameStringLiteral;
1230             case '`' :
1231               try {
1232                 // consume next character
1233                 unicodeAsBackSlash = false;
1234                 currentCharacter = source[currentPosition++];
1235                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1236                 //                  && (source[currentPosition] == 'u')) {
1237                 //                  getNextUnicodeChar();
1238                 //                } else {
1239                 //                  if (withoutUnicodePtr != 0) {
1240                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1241                 //                      currentCharacter;
1242                 //                  }
1243                 //                }
1244                 while (currentCharacter != '`') {
1245                   /** ** in PHP \r and \n are valid in string literals *** */
1246                   //                if ((currentCharacter == '\n')
1247                   //                  || (currentCharacter == '\r')) {
1248                   //                  // relocate if finding another quote fairly close: thus
1249                   // unicode '/u000D' will be fully consumed
1250                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1251                   //                    if (currentPosition + lookAhead == source.length)
1252                   //                      break;
1253                   //                    if (source[currentPosition + lookAhead] == '\n')
1254                   //                      break;
1255                   //                    if (source[currentPosition + lookAhead] == '\"') {
1256                   //                      currentPosition += lookAhead + 1;
1257                   //                      break;
1258                   //                    }
1259                   //                  }
1260                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1261                   //                }
1262                   if (currentCharacter == '\\') {
1263                     int escapeSize = currentPosition;
1264                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1265                     //scanEscapeCharacter make a side effect on this value and
1266                     // we need the previous value few lines down this one
1267                     scanDoubleQuotedEscapeCharacter();
1268                     escapeSize = currentPosition - escapeSize;
1269                     if (withoutUnicodePtr == 0) {
1270                       //buffer all the entries that have been left aside....
1271                       withoutUnicodePtr = currentPosition - escapeSize - 1
1272                           - startPosition;
1273                       System.arraycopy(source, startPosition,
1274                           withoutUnicodeBuffer, 1, withoutUnicodePtr);
1275                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1276                     } else { //overwrite the / in the buffer
1277                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1278                       if (backSlashAsUnicodeInString) { //there are TWO \ in
1279                         // the stream where
1280                         // only one is correct
1281                         withoutUnicodePtr--;
1282                       }
1283                     }
1284                   }
1285                   // consume next character
1286                   unicodeAsBackSlash = false;
1287                   currentCharacter = source[currentPosition++];
1288                   //                  if (((currentCharacter = source[currentPosition++]) ==
1289                   // '\\')
1290                   //                    && (source[currentPosition] == 'u')) {
1291                   //                    getNextUnicodeChar();
1292                   //                  } else {
1293                   if (withoutUnicodePtr != 0) {
1294                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1295                   }
1296                   //                  }
1297                 }
1298               } catch (IndexOutOfBoundsException e) {
1299                 throw new InvalidInputException(UNTERMINATED_STRING);
1300               } catch (InvalidInputException e) {
1301                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1302                   // relocate if finding another quote fairly close: thus
1303                   // unicode '/u000D' will be fully consumed
1304                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1305                     if (currentPosition + lookAhead == source.length)
1306                       break;
1307                     if (source[currentPosition + lookAhead] == '\n')
1308                       break;
1309                     if (source[currentPosition + lookAhead] == '`') {
1310                       currentPosition += lookAhead + 1;
1311                       break;
1312                     }
1313                   }
1314                 }
1315                 throw e; // rethrow
1316               }
1317               if (checkNonExternalizedStringLiterals) { // check for presence
1318                 // of NLS tags
1319                 // //$NON-NLS-?$ where
1320                 // ? is an int.
1321                 if (currentLine == null) {
1322                   currentLine = new NLSLine();
1323                   lines.add(currentLine);
1324                 }
1325                 currentLine.add(new StringLiteral(
1326                     getCurrentTokenSourceString(), startPosition,
1327                     currentPosition - 1));
1328               }
1329               return TokenNameStringInterpolated;
1330             case '#' :
1331             case '/' :
1332               {
1333                 int test;
1334                 if ((currentCharacter == '#')
1335                     || (test = getNextChar('/', '*')) == 0) {
1336                   //line comment
1337                   int endPositionForLineComment = 0;
1338                   try { //get the next char
1339                     currentCharacter = source[currentPosition++];
1340                     //                    if (((currentCharacter = source[currentPosition++])
1341                     //                      == '\\')
1342                     //                      && (source[currentPosition] == 'u')) {
1343                     //                      //-------------unicode traitement ------------
1344                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1345                     //                      currentPosition++;
1346                     //                      while (source[currentPosition] == 'u') {
1347                     //                        currentPosition++;
1348                     //                      }
1349                     //                      if ((c1 =
1350                     //                        Character.getNumericValue(source[currentPosition++]))
1351                     //                        > 15
1352                     //                        || c1 < 0
1353                     //                        || (c2 =
1354                     //                          Character.getNumericValue(source[currentPosition++]))
1355                     //                          > 15
1356                     //                        || c2 < 0
1357                     //                        || (c3 =
1358                     //                          Character.getNumericValue(source[currentPosition++]))
1359                     //                          > 15
1360                     //                        || c3 < 0
1361                     //                        || (c4 =
1362                     //                          Character.getNumericValue(source[currentPosition++]))
1363                     //                          > 15
1364                     //                        || c4 < 0) {
1365                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1366                     //                      } else {
1367                     //                        currentCharacter =
1368                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1369                     //                      }
1370                     //                    }
1371                     //handle the \\u case manually into comment
1372                     //                    if (currentCharacter == '\\') {
1373                     //                      if (source[currentPosition] == '\\')
1374                     //                        currentPosition++;
1375                     //                    } //jump over the \\
1376                     boolean isUnicode = false;
1377                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1378                       if (currentCharacter == '?') {
1379                         if (getNextChar('>')) {
1380                           startPosition = currentPosition - 2;
1381                           phpMode = false;
1382                           return TokenNameStopPHP;
1383                         }
1384                       }
1385                       //get the next char
1386                       isUnicode = false;
1387                       currentCharacter = source[currentPosition++];
1388                       //                      if (((currentCharacter = source[currentPosition++])
1389                       //                        == '\\')
1390                       //                        && (source[currentPosition] == 'u')) {
1391                       //                        isUnicode = true;
1392                       //                        //-------------unicode traitement ------------
1393                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1394                       //                        currentPosition++;
1395                       //                        while (source[currentPosition] == 'u') {
1396                       //                          currentPosition++;
1397                       //                        }
1398                       //                        if ((c1 =
1399                       //                          Character.getNumericValue(source[currentPosition++]))
1400                       //                          > 15
1401                       //                          || c1 < 0
1402                       //                          || (c2 =
1403                       //                            Character.getNumericValue(
1404                       //                              source[currentPosition++]))
1405                       //                            > 15
1406                       //                          || c2 < 0
1407                       //                          || (c3 =
1408                       //                            Character.getNumericValue(
1409                       //                              source[currentPosition++]))
1410                       //                            > 15
1411                       //                          || c3 < 0
1412                       //                          || (c4 =
1413                       //                            Character.getNumericValue(
1414                       //                              source[currentPosition++]))
1415                       //                            > 15
1416                       //                          || c4 < 0) {
1417                       //                          throw new
1418                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1419                       //                        } else {
1420                       //                          currentCharacter =
1421                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1422                       //                        }
1423                       //                      }
1424                       //handle the \\u case manually into comment
1425                       //                      if (currentCharacter == '\\') {
1426                       //                        if (source[currentPosition] == '\\')
1427                       //                          currentPosition++;
1428                       //                      } //jump over the \\
1429                     }
1430                     if (isUnicode) {
1431                       endPositionForLineComment = currentPosition - 6;
1432                     } else {
1433                       endPositionForLineComment = currentPosition - 1;
1434                     }
1435                     recordComment(false);
1436                     if ((currentCharacter == '\r')
1437                         || (currentCharacter == '\n')) {
1438                       checkNonExternalizeString();
1439                       if (recordLineSeparator) {
1440                         if (isUnicode) {
1441                           pushUnicodeLineSeparator();
1442                         } else {
1443                           pushLineSeparator();
1444                         }
1445                       } else {
1446                         currentLine = null;
1447                       }
1448                     }
1449                     if (tokenizeComments) {
1450                       if (!isUnicode) {
1451                         currentPosition = endPositionForLineComment;
1452                         // reset one character behind
1453                       }
1454                       return TokenNameCOMMENT_LINE;
1455                     }
1456                   } catch (IndexOutOfBoundsException e) { //an eof will them
1457                     // be generated
1458                     if (tokenizeComments) {
1459                       currentPosition--;
1460                       // reset one character behind
1461                       return TokenNameCOMMENT_LINE;
1462                     }
1463                   }
1464                   break;
1465                 }
1466                 if (test > 0) {
1467                   //traditional and annotation comment
1468                   boolean isJavadoc = false, star = false;
1469                   // consume next character
1470                   unicodeAsBackSlash = false;
1471                   currentCharacter = source[currentPosition++];
1472                   //                  if (((currentCharacter = source[currentPosition++]) ==
1473                   // '\\')
1474                   //                    && (source[currentPosition] == 'u')) {
1475                   //                    getNextUnicodeChar();
1476                   //                  } else {
1477                   //                    if (withoutUnicodePtr != 0) {
1478                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1479                   //                        currentCharacter;
1480                   //                    }
1481                   //                  }
1482                   if (currentCharacter == '*') {
1483                     isJavadoc = true;
1484                     star = true;
1485                   }
1486                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1487                     checkNonExternalizeString();
1488                     if (recordLineSeparator) {
1489                       pushLineSeparator();
1490                     } else {
1491                       currentLine = null;
1492                     }
1493                   }
1494                   try { //get the next char
1495                     currentCharacter = source[currentPosition++];
1496                     //                    if (((currentCharacter = source[currentPosition++])
1497                     //                      == '\\')
1498                     //                      && (source[currentPosition] == 'u')) {
1499                     //                      //-------------unicode traitement ------------
1500                     //                      getNextUnicodeChar();
1501                     //                    }
1502                     //handle the \\u case manually into comment
1503                     //                    if (currentCharacter == '\\') {
1504                     //                      if (source[currentPosition] == '\\')
1505                     //                        currentPosition++;
1506                     //                      //jump over the \\
1507                     //                    }
1508                     // empty comment is not a javadoc /**/
1509                     if (currentCharacter == '/') {
1510                       isJavadoc = false;
1511                     }
1512                     //loop until end of comment */
1513                     while ((currentCharacter != '/') || (!star)) {
1514                       if ((currentCharacter == '\r')
1515                           || (currentCharacter == '\n')) {
1516                         checkNonExternalizeString();
1517                         if (recordLineSeparator) {
1518                           pushLineSeparator();
1519                         } else {
1520                           currentLine = null;
1521                         }
1522                       }
1523                       star = currentCharacter == '*';
1524                       //get next char
1525                       currentCharacter = source[currentPosition++];
1526                       //                      if (((currentCharacter = source[currentPosition++])
1527                       //                        == '\\')
1528                       //                        && (source[currentPosition] == 'u')) {
1529                       //                        //-------------unicode traitement ------------
1530                       //                        getNextUnicodeChar();
1531                       //                      }
1532                       //handle the \\u case manually into comment
1533                       //                      if (currentCharacter == '\\') {
1534                       //                        if (source[currentPosition] == '\\')
1535                       //                          currentPosition++;
1536                       //                      } //jump over the \\
1537                     }
1538                     recordComment(isJavadoc);
1539                     if (tokenizeComments) {
1540                       if (isJavadoc)
1541                         return TokenNameCOMMENT_PHPDOC;
1542                       return TokenNameCOMMENT_BLOCK;
1543                     }
1544                   } catch (IndexOutOfBoundsException e) {
1545                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1546                   }
1547                   break;
1548                 }
1549                 if (getNextChar('='))
1550                   return TokenNameDIVIDE_EQUAL;
1551                 return TokenNameDIVIDE;
1552               }
1553             case '\u001a' :
1554               if (atEnd())
1555                 return TokenNameEOF;
1556               //the atEnd may not be <currentPosition == source.length> if
1557               // source is only some part of a real (external) stream
1558               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1559             default :
1560               if (currentCharacter == '$') {
1561                 while ((currentCharacter = source[currentPosition++]) == '$') {
1562                 }
1563                 if (currentCharacter == '{')
1564                   return TokenNameDOLLAR_LBRACE;
1565                 if (isPHPIdentifierStart(currentCharacter))
1566                   return scanIdentifierOrKeyword(true);
1567                 return TokenNameERROR;
1568               }
1569               if (isPHPIdentifierStart(currentCharacter))
1570                 return scanIdentifierOrKeyword(false);
1571               if (Character.isDigit(currentCharacter))
1572                 return scanNumber(false);
1573               return TokenNameERROR;
1574           }
1575         }
1576       } //-----------------end switch while try--------------------
1577       catch (IndexOutOfBoundsException e) {
1578       }
1579     }
1580     return TokenNameEOF;
1581   }
1582   //  public final void getNextUnicodeChar()
1583   //    throws IndexOutOfBoundsException, InvalidInputException {
1584   //    //VOID
1585   //    //handle the case of unicode.
1586   //    //when a unicode appears then we must use a buffer that holds char
1587   // internal values
1588   //    //At the end of this method currentCharacter holds the new visited char
1589   //    //and currentPosition points right next after it
1590   //
1591   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1592   //
1593   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1594   //    currentPosition++;
1595   //    while (source[currentPosition] == 'u') {
1596   //      currentPosition++;
1597   //      unicodeSize++;
1598   //    }
1599   //
1600   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1601   //      || c1 < 0
1602   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1603   //      || c2 < 0
1604   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1605   //      || c3 < 0
1606   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1607   //      || c4 < 0) {
1608   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1609   //    } else {
1610   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1611   //      //need the unicode buffer
1612   //      if (withoutUnicodePtr == 0) {
1613   //        //buffer all the entries that have been left aside....
1614   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1615   //        System.arraycopy(
1616   //          source,
1617   //          startPosition,
1618   //          withoutUnicodeBuffer,
1619   //          1,
1620   //          withoutUnicodePtr);
1621   //      }
1622   //      //fill the buffer with the char
1623   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1624   //    }
1625   //    unicodeAsBackSlash = currentCharacter == '\\';
1626   //  }
1627   /*
1628    * Tokenize a method body, assuming that curly brackets are properly
1629    * balanced.
1630    */
1631   public final void jumpOverMethodBody() {
1632     this.wasAcr = false;
1633     int found = 1;
1634     try {
1635       while (true) { //loop for jumping over comments
1636         // ---------Consume white space and handles startPosition---------
1637         boolean isWhiteSpace;
1638         do {
1639           startPosition = currentPosition;
1640           currentCharacter = source[currentPosition++];
1641           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1642           //            && (source[currentPosition] == 'u')) {
1643           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1644           //          } else {
1645           if (recordLineSeparator
1646               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1647             pushLineSeparator();
1648           isWhiteSpace = Character.isWhitespace(currentCharacter);
1649           //          }
1650         } while (isWhiteSpace);
1651         // -------consume token until } is found---------
1652         switch (currentCharacter) {
1653           case '{' :
1654             found++;
1655             break;
1656           case '}' :
1657             found--;
1658             if (found == 0)
1659               return;
1660             break;
1661           case '\'' :
1662             {
1663               boolean test;
1664               test = getNextChar('\\');
1665               if (test) {
1666                 try {
1667                   scanDoubleQuotedEscapeCharacter();
1668                 } catch (InvalidInputException ex) {
1669                 };
1670               } else {
1671                 //                try { // consume next character
1672                 unicodeAsBackSlash = false;
1673                 currentCharacter = source[currentPosition++];
1674                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1675                 //                    && (source[currentPosition] == 'u')) {
1676                 //                    getNextUnicodeChar();
1677                 //                  } else {
1678                 if (withoutUnicodePtr != 0) {
1679                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1680                 }
1681                 //                  }
1682                 //                } catch (InvalidInputException ex) {
1683                 //                };
1684               }
1685               getNextChar('\'');
1686               break;
1687             }
1688           case '"' :
1689             try {
1690               //              try { // consume next character
1691               unicodeAsBackSlash = false;
1692               currentCharacter = source[currentPosition++];
1693               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1694               //                  && (source[currentPosition] == 'u')) {
1695               //                  getNextUnicodeChar();
1696               //                } else {
1697               if (withoutUnicodePtr != 0) {
1698                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1699               }
1700               //                }
1701               //              } catch (InvalidInputException ex) {
1702               //              };
1703               while (currentCharacter != '"') {
1704                 if (currentCharacter == '\r') {
1705                   if (source[currentPosition] == '\n')
1706                     currentPosition++;
1707                   break;
1708                   // the string cannot go further that the line
1709                 }
1710                 if (currentCharacter == '\n') {
1711                   break;
1712                   // the string cannot go further that the line
1713                 }
1714                 if (currentCharacter == '\\') {
1715                   try {
1716                     scanDoubleQuotedEscapeCharacter();
1717                   } catch (InvalidInputException ex) {
1718                   };
1719                 }
1720                 //                try { // consume next character
1721                 unicodeAsBackSlash = false;
1722                 currentCharacter = source[currentPosition++];
1723                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1724                 //                    && (source[currentPosition] == 'u')) {
1725                 //                    getNextUnicodeChar();
1726                 //                  } else {
1727                 if (withoutUnicodePtr != 0) {
1728                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1729                 }
1730                 //                  }
1731                 //                } catch (InvalidInputException ex) {
1732                 //                };
1733               }
1734             } catch (IndexOutOfBoundsException e) {
1735               return;
1736             }
1737             break;
1738           case '/' :
1739             {
1740               int test;
1741               if ((test = getNextChar('/', '*')) == 0) {
1742                 //line comment
1743                 try {
1744                   //get the next char
1745                   currentCharacter = source[currentPosition++];
1746                   //                  if (((currentCharacter = source[currentPosition++]) ==
1747                   // '\\')
1748                   //                    && (source[currentPosition] == 'u')) {
1749                   //                    //-------------unicode traitement ------------
1750                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1751                   //                    currentPosition++;
1752                   //                    while (source[currentPosition] == 'u') {
1753                   //                      currentPosition++;
1754                   //                    }
1755                   //                    if ((c1 =
1756                   //                      Character.getNumericValue(source[currentPosition++]))
1757                   //                      > 15
1758                   //                      || c1 < 0
1759                   //                      || (c2 =
1760                   //                        Character.getNumericValue(source[currentPosition++]))
1761                   //                        > 15
1762                   //                      || c2 < 0
1763                   //                      || (c3 =
1764                   //                        Character.getNumericValue(source[currentPosition++]))
1765                   //                        > 15
1766                   //                      || c3 < 0
1767                   //                      || (c4 =
1768                   //                        Character.getNumericValue(source[currentPosition++]))
1769                   //                        > 15
1770                   //                      || c4 < 0) {
1771                   //                      //error don't care of the value
1772                   //                      currentCharacter = 'A';
1773                   //                    } //something different from \n and \r
1774                   //                    else {
1775                   //                      currentCharacter =
1776                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1777                   //                    }
1778                   //                  }
1779                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1780                     //get the next char
1781                     currentCharacter = source[currentPosition++];
1782                     //                    if (((currentCharacter = source[currentPosition++])
1783                     //                      == '\\')
1784                     //                      && (source[currentPosition] == 'u')) {
1785                     //                      //-------------unicode traitement ------------
1786                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1787                     //                      currentPosition++;
1788                     //                      while (source[currentPosition] == 'u') {
1789                     //                        currentPosition++;
1790                     //                      }
1791                     //                      if ((c1 =
1792                     //                        Character.getNumericValue(source[currentPosition++]))
1793                     //                        > 15
1794                     //                        || c1 < 0
1795                     //                        || (c2 =
1796                     //                          Character.getNumericValue(source[currentPosition++]))
1797                     //                          > 15
1798                     //                        || c2 < 0
1799                     //                        || (c3 =
1800                     //                          Character.getNumericValue(source[currentPosition++]))
1801                     //                          > 15
1802                     //                        || c3 < 0
1803                     //                        || (c4 =
1804                     //                          Character.getNumericValue(source[currentPosition++]))
1805                     //                          > 15
1806                     //                        || c4 < 0) {
1807                     //                        //error don't care of the value
1808                     //                        currentCharacter = 'A';
1809                     //                      } //something different from \n and \r
1810                     //                      else {
1811                     //                        currentCharacter =
1812                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1813                     //                      }
1814                     //                    }
1815                   }
1816                   if (recordLineSeparator
1817                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1818                     pushLineSeparator();
1819                 } catch (IndexOutOfBoundsException e) {
1820                 } //an eof will them be generated
1821                 break;
1822               }
1823               if (test > 0) {
1824                 //traditional and annotation comment
1825                 boolean star = false;
1826                 //                try { // consume next character
1827                 unicodeAsBackSlash = false;
1828                 currentCharacter = source[currentPosition++];
1829                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1830                 //                    && (source[currentPosition] == 'u')) {
1831                 //                    getNextUnicodeChar();
1832                 //                  } else {
1833                 if (withoutUnicodePtr != 0) {
1834                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1835                 }
1836                 //                  };
1837                 //                } catch (InvalidInputException ex) {
1838                 //                };
1839                 if (currentCharacter == '*') {
1840                   star = true;
1841                 }
1842                 if (recordLineSeparator
1843                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1844                   pushLineSeparator();
1845                 try { //get the next char
1846                   currentCharacter = source[currentPosition++];
1847                   //                  if (((currentCharacter = source[currentPosition++]) ==
1848                   // '\\')
1849                   //                    && (source[currentPosition] == 'u')) {
1850                   //                    //-------------unicode traitement ------------
1851                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1852                   //                    currentPosition++;
1853                   //                    while (source[currentPosition] == 'u') {
1854                   //                      currentPosition++;
1855                   //                    }
1856                   //                    if ((c1 =
1857                   //                      Character.getNumericValue(source[currentPosition++]))
1858                   //                      > 15
1859                   //                      || c1 < 0
1860                   //                      || (c2 =
1861                   //                        Character.getNumericValue(source[currentPosition++]))
1862                   //                        > 15
1863                   //                      || c2 < 0
1864                   //                      || (c3 =
1865                   //                        Character.getNumericValue(source[currentPosition++]))
1866                   //                        > 15
1867                   //                      || c3 < 0
1868                   //                      || (c4 =
1869                   //                        Character.getNumericValue(source[currentPosition++]))
1870                   //                        > 15
1871                   //                      || c4 < 0) {
1872                   //                      //error don't care of the value
1873                   //                      currentCharacter = 'A';
1874                   //                    } //something different from * and /
1875                   //                    else {
1876                   //                      currentCharacter =
1877                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1878                   //                    }
1879                   //                  }
1880                   //loop until end of comment */
1881                   while ((currentCharacter != '/') || (!star)) {
1882                     if (recordLineSeparator
1883                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1884                       pushLineSeparator();
1885                     star = currentCharacter == '*';
1886                     //get next char
1887                     currentCharacter = source[currentPosition++];
1888                     //                    if (((currentCharacter = source[currentPosition++])
1889                     //                      == '\\')
1890                     //                      && (source[currentPosition] == 'u')) {
1891                     //                      //-------------unicode traitement ------------
1892                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1893                     //                      currentPosition++;
1894                     //                      while (source[currentPosition] == 'u') {
1895                     //                        currentPosition++;
1896                     //                      }
1897                     //                      if ((c1 =
1898                     //                        Character.getNumericValue(source[currentPosition++]))
1899                     //                        > 15
1900                     //                        || c1 < 0
1901                     //                        || (c2 =
1902                     //                          Character.getNumericValue(source[currentPosition++]))
1903                     //                          > 15
1904                     //                        || c2 < 0
1905                     //                        || (c3 =
1906                     //                          Character.getNumericValue(source[currentPosition++]))
1907                     //                          > 15
1908                     //                        || c3 < 0
1909                     //                        || (c4 =
1910                     //                          Character.getNumericValue(source[currentPosition++]))
1911                     //                          > 15
1912                     //                        || c4 < 0) {
1913                     //                        //error don't care of the value
1914                     //                        currentCharacter = 'A';
1915                     //                      } //something different from * and /
1916                     //                      else {
1917                     //                        currentCharacter =
1918                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1919                     //                      }
1920                     //                    }
1921                   }
1922                 } catch (IndexOutOfBoundsException e) {
1923                   return;
1924                 }
1925                 break;
1926               }
1927               break;
1928             }
1929           default :
1930             if (isPHPIdentifierStart(currentCharacter)
1931                 || currentCharacter == '$') {
1932               try {
1933                 scanIdentifierOrKeyword((currentCharacter == '$'));
1934               } catch (InvalidInputException ex) {
1935               };
1936               break;
1937             }
1938             if (Character.isDigit(currentCharacter)) {
1939               try {
1940                 scanNumber(false);
1941               } catch (InvalidInputException ex) {
1942               };
1943               break;
1944             }
1945         }
1946       }
1947       //-----------------end switch while try--------------------
1948     } catch (IndexOutOfBoundsException e) {
1949     } catch (InvalidInputException e) {
1950     }
1951     return;
1952   }
1953   //  public final boolean jumpOverUnicodeWhiteSpace()
1954   //    throws InvalidInputException {
1955   //    //BOOLEAN
1956   //    //handle the case of unicode. Jump over the next whiteSpace
1957   //    //making startPosition pointing on the next available char
1958   //    //On false, the currentCharacter is filled up with a potential
1959   //    //correct char
1960   //
1961   //    try {
1962   //      this.wasAcr = false;
1963   //      int c1, c2, c3, c4;
1964   //      int unicodeSize = 6;
1965   //      currentPosition++;
1966   //      while (source[currentPosition] == 'u') {
1967   //        currentPosition++;
1968   //        unicodeSize++;
1969   //      }
1970   //
1971   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1972   //        || c1 < 0)
1973   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1974   //          || c2 < 0)
1975   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1976   //          || c3 < 0)
1977   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1978   //          || c4 < 0)) {
1979   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1980   //      }
1981   //
1982   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1983   //      if (recordLineSeparator
1984   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1985   //        pushLineSeparator();
1986   //      if (Character.isWhitespace(currentCharacter))
1987   //        return true;
1988   //
1989   //      //buffer the new char which is not a white space
1990   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1991   //      //withoutUnicodePtr == 1 is true here
1992   //      return false;
1993   //    } catch (IndexOutOfBoundsException e) {
1994   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1995   //    }
1996   //  }
1997   public final int[] getLineEnds() {
1998     //return a bounded copy of this.lineEnds
1999     int[] copy;
2000     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2001     return copy;
2002   }
2003   public char[] getSource() {
2004     return this.source;
2005   }
2006   final char[] optimizedCurrentTokenSource1() {
2007     //return always the same char[] build only once
2008     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2009     char charOne = source[startPosition];
2010     switch (charOne) {
2011       case 'a' :
2012         return charArray_a;
2013       case 'b' :
2014         return charArray_b;
2015       case 'c' :
2016         return charArray_c;
2017       case 'd' :
2018         return charArray_d;
2019       case 'e' :
2020         return charArray_e;
2021       case 'f' :
2022         return charArray_f;
2023       case 'g' :
2024         return charArray_g;
2025       case 'h' :
2026         return charArray_h;
2027       case 'i' :
2028         return charArray_i;
2029       case 'j' :
2030         return charArray_j;
2031       case 'k' :
2032         return charArray_k;
2033       case 'l' :
2034         return charArray_l;
2035       case 'm' :
2036         return charArray_m;
2037       case 'n' :
2038         return charArray_n;
2039       case 'o' :
2040         return charArray_o;
2041       case 'p' :
2042         return charArray_p;
2043       case 'q' :
2044         return charArray_q;
2045       case 'r' :
2046         return charArray_r;
2047       case 's' :
2048         return charArray_s;
2049       case 't' :
2050         return charArray_t;
2051       case 'u' :
2052         return charArray_u;
2053       case 'v' :
2054         return charArray_v;
2055       case 'w' :
2056         return charArray_w;
2057       case 'x' :
2058         return charArray_x;
2059       case 'y' :
2060         return charArray_y;
2061       case 'z' :
2062         return charArray_z;
2063       default :
2064         return new char[]{charOne};
2065     }
2066   }
2067   final char[] optimizedCurrentTokenSource2() {
2068     //try to return the same char[] build only once
2069     char c0, c1;
2070     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2071         % TableSize;
2072     char[][] table = charArray_length[0][hash];
2073     int i = newEntry2;
2074     while (++i < InternalTableSize) {
2075       char[] charArray = table[i];
2076       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2077         return charArray;
2078     }
2079     //---------other side---------
2080     i = -1;
2081     int max = newEntry2;
2082     while (++i <= max) {
2083       char[] charArray = table[i];
2084       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2085         return charArray;
2086     }
2087     //--------add the entry-------
2088     if (++max >= InternalTableSize)
2089       max = 0;
2090     char[] r;
2091     table[max] = (r = new char[]{c0, c1});
2092     newEntry2 = max;
2093     return r;
2094   }
2095   final char[] optimizedCurrentTokenSource3() {
2096     //try to return the same char[] build only once
2097     char c0, c1, c2;
2098     int hash = (((c0 = source[startPosition]) << 12)
2099         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2100         % TableSize;
2101     char[][] table = charArray_length[1][hash];
2102     int i = newEntry3;
2103     while (++i < InternalTableSize) {
2104       char[] charArray = table[i];
2105       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2106         return charArray;
2107     }
2108     //---------other side---------
2109     i = -1;
2110     int max = newEntry3;
2111     while (++i <= max) {
2112       char[] charArray = table[i];
2113       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2114         return charArray;
2115     }
2116     //--------add the entry-------
2117     if (++max >= InternalTableSize)
2118       max = 0;
2119     char[] r;
2120     table[max] = (r = new char[]{c0, c1, c2});
2121     newEntry3 = max;
2122     return r;
2123   }
2124   final char[] optimizedCurrentTokenSource4() {
2125     //try to return the same char[] build only once
2126     char c0, c1, c2, c3;
2127     long hash = ((((long) (c0 = source[startPosition])) << 18)
2128         + ((c1 = source[startPosition + 1]) << 12)
2129         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2130         % TableSize;
2131     char[][] table = charArray_length[2][(int) hash];
2132     int i = newEntry4;
2133     while (++i < InternalTableSize) {
2134       char[] charArray = table[i];
2135       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2136           && (c3 == charArray[3]))
2137         return charArray;
2138     }
2139     //---------other side---------
2140     i = -1;
2141     int max = newEntry4;
2142     while (++i <= max) {
2143       char[] charArray = table[i];
2144       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2145           && (c3 == charArray[3]))
2146         return charArray;
2147     }
2148     //--------add the entry-------
2149     if (++max >= InternalTableSize)
2150       max = 0;
2151     char[] r;
2152     table[max] = (r = new char[]{c0, c1, c2, c3});
2153     newEntry4 = max;
2154     return r;
2155   }
2156   final char[] optimizedCurrentTokenSource5() {
2157     //try to return the same char[] build only once
2158     char c0, c1, c2, c3, c4;
2159     long hash = ((((long) (c0 = source[startPosition])) << 24)
2160         + (((long) (c1 = source[startPosition + 1])) << 18)
2161         + ((c2 = source[startPosition + 2]) << 12)
2162         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2163         % TableSize;
2164     char[][] table = charArray_length[3][(int) hash];
2165     int i = newEntry5;
2166     while (++i < InternalTableSize) {
2167       char[] charArray = table[i];
2168       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2169           && (c3 == charArray[3]) && (c4 == charArray[4]))
2170         return charArray;
2171     }
2172     //---------other side---------
2173     i = -1;
2174     int max = newEntry5;
2175     while (++i <= max) {
2176       char[] charArray = table[i];
2177       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2178           && (c3 == charArray[3]) && (c4 == charArray[4]))
2179         return charArray;
2180     }
2181     //--------add the entry-------
2182     if (++max >= InternalTableSize)
2183       max = 0;
2184     char[] r;
2185     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2186     newEntry5 = max;
2187     return r;
2188   }
2189   final char[] optimizedCurrentTokenSource6() {
2190     //try to return the same char[] build only once
2191     char c0, c1, c2, c3, c4, c5;
2192     long hash = ((((long) (c0 = source[startPosition])) << 32)
2193         + (((long) (c1 = source[startPosition + 1])) << 24)
2194         + (((long) (c2 = source[startPosition + 2])) << 18)
2195         + ((c3 = source[startPosition + 3]) << 12)
2196         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2197         % TableSize;
2198     char[][] table = charArray_length[4][(int) hash];
2199     int i = newEntry6;
2200     while (++i < InternalTableSize) {
2201       char[] charArray = table[i];
2202       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2203           && (c3 == charArray[3]) && (c4 == charArray[4])
2204           && (c5 == charArray[5]))
2205         return charArray;
2206     }
2207     //---------other side---------
2208     i = -1;
2209     int max = newEntry6;
2210     while (++i <= max) {
2211       char[] charArray = table[i];
2212       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2213           && (c3 == charArray[3]) && (c4 == charArray[4])
2214           && (c5 == charArray[5]))
2215         return charArray;
2216     }
2217     //--------add the entry-------
2218     if (++max >= InternalTableSize)
2219       max = 0;
2220     char[] r;
2221     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2222     newEntry6 = max;
2223     return r;
2224   }
2225   public final void pushLineSeparator() throws InvalidInputException {
2226     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2227     final int INCREMENT = 250;
2228     if (this.checkNonExternalizedStringLiterals) {
2229       // reinitialize the current line for non externalize strings purpose
2230       currentLine = null;
2231     }
2232     //currentCharacter is at position currentPosition-1
2233     // cr 000D
2234     if (currentCharacter == '\r') {
2235       int separatorPos = currentPosition - 1;
2236       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2237         return;
2238       //System.out.println("CR-" + separatorPos);
2239       try {
2240         lineEnds[++linePtr] = separatorPos;
2241       } catch (IndexOutOfBoundsException e) {
2242         //linePtr value is correct
2243         int oldLength = lineEnds.length;
2244         int[] old = lineEnds;
2245         lineEnds = new int[oldLength + INCREMENT];
2246         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2247         lineEnds[linePtr] = separatorPos;
2248       }
2249       // look-ahead for merged cr+lf
2250       try {
2251         if (source[currentPosition] == '\n') {
2252           //System.out.println("look-ahead LF-" + currentPosition);
2253           lineEnds[linePtr] = currentPosition;
2254           currentPosition++;
2255           wasAcr = false;
2256         } else {
2257           wasAcr = true;
2258         }
2259       } catch (IndexOutOfBoundsException e) {
2260         wasAcr = true;
2261       }
2262     } else {
2263       // lf 000A
2264       if (currentCharacter == '\n') {
2265         //must merge eventual cr followed by lf
2266         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2267           //System.out.println("merge LF-" + (currentPosition - 1));
2268           lineEnds[linePtr] = currentPosition - 1;
2269         } else {
2270           int separatorPos = currentPosition - 1;
2271           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2272             return;
2273           // System.out.println("LF-" + separatorPos);
2274           try {
2275             lineEnds[++linePtr] = separatorPos;
2276           } catch (IndexOutOfBoundsException e) {
2277             //linePtr value is correct
2278             int oldLength = lineEnds.length;
2279             int[] old = lineEnds;
2280             lineEnds = new int[oldLength + INCREMENT];
2281             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2282             lineEnds[linePtr] = separatorPos;
2283           }
2284         }
2285         wasAcr = false;
2286       }
2287     }
2288   }
2289   public final void pushUnicodeLineSeparator() {
2290     // isUnicode means that the \r or \n has been read as a unicode character
2291     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2292     final int INCREMENT = 250;
2293     //currentCharacter is at position currentPosition-1
2294     if (this.checkNonExternalizedStringLiterals) {
2295       // reinitialize the current line for non externalize strings purpose
2296       currentLine = null;
2297     }
2298     // cr 000D
2299     if (currentCharacter == '\r') {
2300       int separatorPos = currentPosition - 6;
2301       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2302         return;
2303       //System.out.println("CR-" + separatorPos);
2304       try {
2305         lineEnds[++linePtr] = separatorPos;
2306       } catch (IndexOutOfBoundsException e) {
2307         //linePtr value is correct
2308         int oldLength = lineEnds.length;
2309         int[] old = lineEnds;
2310         lineEnds = new int[oldLength + INCREMENT];
2311         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2312         lineEnds[linePtr] = separatorPos;
2313       }
2314       // look-ahead for merged cr+lf
2315       if (source[currentPosition] == '\n') {
2316         //System.out.println("look-ahead LF-" + currentPosition);
2317         lineEnds[linePtr] = currentPosition;
2318         currentPosition++;
2319         wasAcr = false;
2320       } else {
2321         wasAcr = true;
2322       }
2323     } else {
2324       // lf 000A
2325       if (currentCharacter == '\n') {
2326         //must merge eventual cr followed by lf
2327         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2328           //System.out.println("merge LF-" + (currentPosition - 1));
2329           lineEnds[linePtr] = currentPosition - 6;
2330         } else {
2331           int separatorPos = currentPosition - 6;
2332           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2333             return;
2334           // System.out.println("LF-" + separatorPos);
2335           try {
2336             lineEnds[++linePtr] = separatorPos;
2337           } catch (IndexOutOfBoundsException e) {
2338             //linePtr value is correct
2339             int oldLength = lineEnds.length;
2340             int[] old = lineEnds;
2341             lineEnds = new int[oldLength + INCREMENT];
2342             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2343             lineEnds[linePtr] = separatorPos;
2344           }
2345         }
2346         wasAcr = false;
2347       }
2348     }
2349   }
2350   public final void recordComment(boolean isJavadoc) {
2351     // a new annotation comment is recorded
2352     try {
2353       commentStops[++commentPtr] = isJavadoc
2354           ? currentPosition
2355           : -currentPosition;
2356     } catch (IndexOutOfBoundsException e) {
2357       int oldStackLength = commentStops.length;
2358       int[] oldStack = commentStops;
2359       commentStops = new int[oldStackLength + 30];
2360       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2361       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2362       //grows the positions buffers too
2363       int[] old = commentStarts;
2364       commentStarts = new int[oldStackLength + 30];
2365       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2366     }
2367     //the buffer is of a correct size here
2368     commentStarts[commentPtr] = startPosition;
2369   }
2370   public void resetTo(int begin, int end) {
2371     //reset the scanner to a given position where it may rescan again
2372     diet = false;
2373     initialPosition = startPosition = currentPosition = begin;
2374     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2375     commentPtr = -1; // reset comment stack
2376   }
2377   public final void scanSingleQuotedEscapeCharacter()
2378       throws InvalidInputException {
2379     // the string with "\\u" is a legal string of two chars \ and u
2380     //thus we use a direct access to the source (for regular cases).
2381     //    if (unicodeAsBackSlash) {
2382     //      // consume next character
2383     //      unicodeAsBackSlash = false;
2384     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2385     //        && (source[currentPosition] == 'u')) {
2386     //        getNextUnicodeChar();
2387     //      } else {
2388     //        if (withoutUnicodePtr != 0) {
2389     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2390     //        }
2391     //      }
2392     //    } else
2393     currentCharacter = source[currentPosition++];
2394     switch (currentCharacter) {
2395       case '\'' :
2396         currentCharacter = '\'';
2397         break;
2398       case '\\' :
2399         currentCharacter = '\\';
2400         break;
2401       default :
2402         currentCharacter = '\\';
2403         currentPosition--;
2404     }
2405   }
2406   public final void scanDoubleQuotedEscapeCharacter()
2407       throws InvalidInputException {
2408     // the string with "\\u" is a legal string of two chars \ and u
2409     //thus we use a direct access to the source (for regular cases).
2410     //    if (unicodeAsBackSlash) {
2411     //      // consume next character
2412     //      unicodeAsBackSlash = false;
2413     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2414     //        && (source[currentPosition] == 'u')) {
2415     //        getNextUnicodeChar();
2416     //      } else {
2417     //        if (withoutUnicodePtr != 0) {
2418     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2419     //        }
2420     //      }
2421     //    } else
2422     currentCharacter = source[currentPosition++];
2423     switch (currentCharacter) {
2424       //      case 'b' :
2425       //        currentCharacter = '\b';
2426       //        break;
2427       case 't' :
2428         currentCharacter = '\t';
2429         break;
2430       case 'n' :
2431         currentCharacter = '\n';
2432         break;
2433       //      case 'f' :
2434       //        currentCharacter = '\f';
2435       //        break;
2436       case 'r' :
2437         currentCharacter = '\r';
2438         break;
2439       case '\"' :
2440         currentCharacter = '\"';
2441         break;
2442       case '\'' :
2443         currentCharacter = '\'';
2444         break;
2445       case '\\' :
2446         currentCharacter = '\\';
2447         break;
2448       case '$' :
2449         currentCharacter = '$';
2450         break;
2451       default :
2452         // -----------octal escape--------------
2453         // OctalDigit
2454         // OctalDigit OctalDigit
2455         // ZeroToThree OctalDigit OctalDigit
2456         int number = Character.getNumericValue(currentCharacter);
2457         if (number >= 0 && number <= 7) {
2458           boolean zeroToThreeNot = number > 3;
2459           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2460             int digit = Character.getNumericValue(currentCharacter);
2461             if (digit >= 0 && digit <= 7) {
2462               number = (number * 8) + digit;
2463               if (Character
2464                   .isDigit(currentCharacter = source[currentPosition++])) {
2465                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2466                   // Digit --> ignore last character
2467                   currentPosition--;
2468                 } else {
2469                   digit = Character.getNumericValue(currentCharacter);
2470                   if (digit >= 0 && digit <= 7) {
2471                     // has read \ZeroToThree OctalDigit OctalDigit
2472                     number = (number * 8) + digit;
2473                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2474                     // --> ignore last character
2475                     currentPosition--;
2476                   }
2477                 }
2478               } else { // has read \OctalDigit NonDigit--> ignore last
2479                 // character
2480                 currentPosition--;
2481               }
2482             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2483               // character
2484               currentPosition--;
2485             }
2486           } else { // has read \OctalDigit --> ignore last character
2487             currentPosition--;
2488           }
2489           if (number > 255)
2490             throw new InvalidInputException(INVALID_ESCAPE);
2491           currentCharacter = (char) number;
2492         }
2493     //else
2494     //     throw new InvalidInputException(INVALID_ESCAPE);
2495     }
2496   }
2497   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2498   //    return scanIdentifierOrKeyword( false );
2499   //  }
2500   public int scanIdentifierOrKeyword(boolean isVariable)
2501       throws InvalidInputException {
2502     //test keywords
2503     //first dispatch on the first char.
2504     //then the length. If there are several
2505     //keywors with the same length AND the same first char, then do another
2506     //disptach on the second char :-)...cool....but fast !
2507     useAssertAsAnIndentifier = false;
2508     while (getNextCharAsJavaIdentifierPart()) {
2509     };
2510     if (isVariable) {
2511       if (new String(getCurrentTokenSource()).equals("$this")) {
2512         return TokenNamethis;
2513       }
2514       return TokenNameVariable;
2515     }
2516     int index, length;
2517     char[] data;
2518     char firstLetter;
2519     //    if (withoutUnicodePtr == 0)
2520     //quick test on length == 1 but not on length > 12 while most identifier
2521     //have a length which is <= 12...but there are lots of identifier with
2522     //only one char....
2523     //      {
2524     if ((length = currentPosition - startPosition) == 1)
2525       return TokenNameIdentifier;
2526     //  data = source;
2527     data = new char[length];
2528     index = startPosition;
2529     for (int i = 0; i < length; i++) {
2530       data[i] = Character.toLowerCase(source[index + i]);
2531     }
2532     index = 0;
2533     //    } else {
2534     //      if ((length = withoutUnicodePtr) == 1)
2535     //        return TokenNameIdentifier;
2536     //      // data = withoutUnicodeBuffer;
2537     //      data = new char[withoutUnicodeBuffer.length];
2538     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2539     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2540     //      }
2541     //      index = 1;
2542     //    }
2543     firstLetter = data[index];
2544     switch (firstLetter) {
2545       case '_' :
2546         switch (length) {
2547           case 8 :
2548             //__FILE__
2549             if ((data[++index] == '_') && (data[++index] == 'f')
2550                 && (data[++index] == 'i') && (data[++index] == 'l')
2551                 && (data[++index] == 'e') && (data[++index] == '_')
2552                 && (data[++index] == '_'))
2553               return TokenNameFILE;
2554             index = 0; //__LINE__
2555             if ((data[++index] == '_') && (data[++index] == 'l')
2556                 && (data[++index] == 'i') && (data[++index] == 'n')
2557                 && (data[++index] == 'e') && (data[++index] == '_')
2558                 && (data[++index] == '_'))
2559               return TokenNameLINE;
2560             break;
2561           case 9 :
2562             //__CLASS__
2563             if ((data[++index] == '_') && (data[++index] == 'c')
2564                 && (data[++index] == 'l') && (data[++index] == 'a')
2565                 && (data[++index] == 's') && (data[++index] == 's')
2566                 && (data[++index] == '_') && (data[++index] == '_'))
2567               return TokenNameCLASS_C;
2568             break;
2569           case 11 :
2570             //__METHOD__
2571             if ((data[++index] == '_') && (data[++index] == 'm')
2572                 && (data[++index] == 'e') && (data[++index] == 't')
2573                 && (data[++index] == 'h') && (data[++index] == 'o')
2574                 && (data[++index] == 'd') && (data[++index] == '_')
2575                 && (data[++index] == '_'))
2576               return TokenNameMETHOD_C;
2577             break;
2578           case 12 :
2579             //__FUNCTION__
2580             if ((data[++index] == '_') && (data[++index] == 'f')
2581                 && (data[++index] == 'u') && (data[++index] == 'n')
2582                 && (data[++index] == 'c') && (data[++index] == 't')
2583                 && (data[++index] == 'i') && (data[++index] == 'o')
2584                 && (data[++index] == 'n') && (data[++index] == '_')
2585                 && (data[++index] == '_'))
2586               return TokenNameFUNC_C;
2587             break;
2588         }
2589         return TokenNameIdentifier;
2590       case 'a' :
2591         // as and array abstract
2592         switch (length) {
2593           case 2 :
2594             //as
2595             if ((data[++index] == 's')) {
2596               return TokenNameas;
2597             } else {
2598               return TokenNameIdentifier;
2599             }
2600           case 3 :
2601             //and
2602             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2603               return TokenNameAND;
2604             } else {
2605               return TokenNameIdentifier;
2606             }
2607           case 5 :
2608             // array
2609             if ((data[++index] == 'r') && (data[++index] == 'r')
2610                 && (data[++index] == 'a') && (data[++index] == 'y'))
2611               return TokenNamearray;
2612             else
2613               return TokenNameIdentifier;
2614           case 8 :
2615             if ((data[++index] == 'b') && (data[++index] == 's')
2616                 && (data[++index] == 't') && (data[++index] == 'r')
2617                 && (data[++index] == 'a') && (data[++index] == 'c')
2618                 && (data[++index] == 't'))
2619               return TokenNameabstract;
2620             else
2621               return TokenNameIdentifier;
2622           default :
2623             return TokenNameIdentifier;
2624         }
2625       case 'b' :
2626         //break
2627         switch (length) {
2628           case 5 :
2629             if ((data[++index] == 'r') && (data[++index] == 'e')
2630                 && (data[++index] == 'a') && (data[++index] == 'k'))
2631               return TokenNamebreak;
2632             else
2633               return TokenNameIdentifier;
2634           default :
2635             return TokenNameIdentifier;
2636         }
2637       case 'c' :
2638         //case catch class const continue
2639         switch (length) {
2640           case 4 :
2641             if ((data[++index] == 'a') && (data[++index] == 's')
2642                 && (data[++index] == 'e'))
2643               return TokenNamecase;
2644             else
2645               return TokenNameIdentifier;
2646           case 5 :
2647             if ((data[++index] == 'a') && (data[++index] == 't')
2648                 && (data[++index] == 'c') && (data[++index] == 'h'))
2649               return TokenNamecatch;
2650             if ((data[index] == 'l') && (data[++index] == 'a')
2651                 && (data[++index] == 's') && (data[++index] == 's'))
2652               return TokenNameclass;
2653             if ((data[index] == 'o') && (data[++index] == 'n')
2654                 && (data[++index] == 's') && (data[++index] == 't'))
2655               return TokenNameconst;
2656             else
2657               return TokenNameIdentifier;
2658           case 8 :
2659             if ((data[++index] == 'o') && (data[++index] == 'n')
2660                 && (data[++index] == 't') && (data[++index] == 'i')
2661                 && (data[++index] == 'n') && (data[++index] == 'u')
2662                 && (data[++index] == 'e'))
2663               return TokenNamecontinue;
2664             else
2665               return TokenNameIdentifier;
2666           default :
2667             return TokenNameIdentifier;
2668         }
2669       case 'd' :
2670         // declare default do die
2671         // TODO delete define ==> no keyword !
2672         switch (length) {
2673           case 2 :
2674             if ((data[++index] == 'o'))
2675               return TokenNamedo;
2676             else
2677               return TokenNameIdentifier;
2678           //          case 6 :
2679           //            if ((data[++index] == 'e')
2680           //              && (data[++index] == 'f')
2681           //              && (data[++index] == 'i')
2682           //              && (data[++index] == 'n')
2683           //              && (data[++index] == 'e'))
2684           //              return TokenNamedefine;
2685           //            else
2686           //              return TokenNameIdentifier;
2687           case 7 :
2688             if ((data[++index] == 'e') && (data[++index] == 'c')
2689                 && (data[++index] == 'l') && (data[++index] == 'a')
2690                 && (data[++index] == 'r') && (data[++index] == 'e'))
2691               return TokenNamedeclare;
2692             index = 0;
2693             if ((data[++index] == 'e') && (data[++index] == 'f')
2694                 && (data[++index] == 'a') && (data[++index] == 'u')
2695                 && (data[++index] == 'l') && (data[++index] == 't'))
2696               return TokenNamedefault;
2697             else
2698               return TokenNameIdentifier;
2699           default :
2700             return TokenNameIdentifier;
2701         }
2702       case 'e' :
2703         //echo else exit elseif extends eval
2704         switch (length) {
2705           case 4 :
2706             if ((data[++index] == 'c') && (data[++index] == 'h')
2707                 && (data[++index] == 'o'))
2708               return TokenNameecho;
2709             else if ((data[index] == 'l') && (data[++index] == 's')
2710                 && (data[++index] == 'e'))
2711               return TokenNameelse;
2712             else if ((data[index] == 'x') && (data[++index] == 'i')
2713                 && (data[++index] == 't'))
2714               return TokenNameexit;
2715             else if ((data[index] == 'v') && (data[++index] == 'a')
2716                 && (data[++index] == 'l'))
2717               return TokenNameeval;
2718             else
2719               return TokenNameIdentifier;
2720           case 5 :
2721             // endif empty
2722             if ((data[++index] == 'n') && (data[++index] == 'd')
2723                 && (data[++index] == 'i') && (data[++index] == 'f'))
2724               return TokenNameendif;
2725             if ((data[index] == 'm') && (data[++index] == 'p')
2726                 && (data[++index] == 't') && (data[++index] == 'y'))
2727               return TokenNameempty;
2728             else
2729               return TokenNameIdentifier;
2730           case 6 :
2731             // endfor
2732             if ((data[++index] == 'n') && (data[++index] == 'd')
2733                 && (data[++index] == 'f') && (data[++index] == 'o')
2734                 && (data[++index] == 'r'))
2735               return TokenNameendfor;
2736             else if ((data[index] == 'l') && (data[++index] == 's')
2737                 && (data[++index] == 'e') && (data[++index] == 'i')
2738                 && (data[++index] == 'f'))
2739               return TokenNameelseif;
2740             else
2741               return TokenNameIdentifier;
2742           case 7 :
2743             if ((data[++index] == 'x') && (data[++index] == 't')
2744                 && (data[++index] == 'e') && (data[++index] == 'n')
2745                 && (data[++index] == 'd') && (data[++index] == 's'))
2746               return TokenNameextends;
2747             else
2748               return TokenNameIdentifier;
2749           case 8 :
2750             // endwhile
2751             if ((data[++index] == 'n') && (data[++index] == 'd')
2752                 && (data[++index] == 'w') && (data[++index] == 'h')
2753                 && (data[++index] == 'i') && (data[++index] == 'l')
2754                 && (data[++index] == 'e'))
2755               return TokenNameendwhile;
2756             else
2757               return TokenNameIdentifier;
2758           case 9 :
2759             // endswitch
2760             if ((data[++index] == 'n') && (data[++index] == 'd')
2761                 && (data[++index] == 's') && (data[++index] == 'w')
2762                 && (data[++index] == 'i') && (data[++index] == 't')
2763                 && (data[++index] == 'c') && (data[++index] == 'h'))
2764               return TokenNameendswitch;
2765             else
2766               return TokenNameIdentifier;
2767           case 10 :
2768             // enddeclare
2769             if ((data[++index] == 'n') && (data[++index] == 'd')
2770                 && (data[++index] == 'd') && (data[++index] == 'e')
2771                 && (data[++index] == 'c') && (data[++index] == 'l')
2772                 && (data[++index] == 'a') && (data[++index] == 'r')
2773                 && (data[++index] == 'e'))
2774               return TokenNameendforeach;
2775             index = 0;
2776             if ((data[++index] == 'n') // endforeach
2777                 && (data[++index] == 'd') && (data[++index] == 'f')
2778                 && (data[++index] == 'o') && (data[++index] == 'r')
2779                 && (data[++index] == 'e') && (data[++index] == 'a')
2780                 && (data[++index] == 'c') && (data[++index] == 'h'))
2781               return TokenNameendforeach;
2782             else
2783               return TokenNameIdentifier;
2784           default :
2785             return TokenNameIdentifier;
2786         }
2787       case 'f' :
2788         //for false final function
2789         switch (length) {
2790           case 3 :
2791             if ((data[++index] == 'o') && (data[++index] == 'r'))
2792               return TokenNamefor;
2793             else
2794               return TokenNameIdentifier;
2795           case 5 :
2796 //            if ((data[++index] == 'a') && (data[++index] == 'l')
2797 //                && (data[++index] == 's') && (data[++index] == 'e'))
2798 //              return TokenNamefalse;
2799             if ((data[++index] == 'i') && (data[++index] == 'n')
2800                 && (data[++index] == 'a') && (data[++index] == 'l'))
2801               return TokenNamefinal;
2802             else
2803               return TokenNameIdentifier;
2804           case 7 :
2805             // foreach
2806             if ((data[++index] == 'o') && (data[++index] == 'r')
2807                 && (data[++index] == 'e') && (data[++index] == 'a')
2808                 && (data[++index] == 'c') && (data[++index] == 'h'))
2809               return TokenNameforeach;
2810             else
2811               return TokenNameIdentifier;
2812           case 8 :
2813             // function
2814             if ((data[++index] == 'u') && (data[++index] == 'n')
2815                 && (data[++index] == 'c') && (data[++index] == 't')
2816                 && (data[++index] == 'i') && (data[++index] == 'o')
2817                 && (data[++index] == 'n'))
2818               return TokenNamefunction;
2819             else
2820               return TokenNameIdentifier;
2821           default :
2822             return TokenNameIdentifier;
2823         }
2824       case 'g' :
2825         //global
2826         if (length == 6) {
2827           if ((data[++index] == 'l') && (data[++index] == 'o')
2828               && (data[++index] == 'b') && (data[++index] == 'a')
2829               && (data[++index] == 'l')) {
2830             return TokenNameglobal;
2831           }
2832         }
2833         return TokenNameIdentifier;
2834       case 'i' :
2835         //if int isset include include_once instanceof interface implements
2836         switch (length) {
2837           case 2 :
2838             if (data[++index] == 'f')
2839               return TokenNameif;
2840             else
2841               return TokenNameIdentifier;
2842           //          case 3 :
2843           //            if ((data[++index] == 'n') && (data[++index] == 't'))
2844           //              return TokenNameint;
2845           //            else
2846           //              return TokenNameIdentifier;
2847           case 5 :
2848             if ((data[++index] == 's') && (data[++index] == 's')
2849                 && (data[++index] == 'e') && (data[++index] == 't'))
2850               return TokenNameisset;
2851             else
2852               return TokenNameIdentifier;
2853           case 7 :
2854             if ((data[++index] == 'n') && (data[++index] == 'c')
2855                 && (data[++index] == 'l') && (data[++index] == 'u')
2856                 && (data[++index] == 'd') && (data[++index] == 'e'))
2857               return TokenNameinclude;
2858             else
2859               return TokenNameIdentifier;
2860           case 9 :
2861             // interface
2862             if ((data[++index] == 'n') && (data[++index] == 't')
2863                 && (data[++index] == 'e') && (data[++index] == 'r')
2864                 && (data[++index] == 'f') && (data[++index] == 'a')
2865                 && (data[++index] == 'c') && (data[++index] == 'e'))
2866               return TokenNameinterface;
2867             else
2868               return TokenNameIdentifier;
2869           case 10 :
2870             // instanceof
2871             if ((data[++index] == 'n') && (data[++index] == 's')
2872                 && (data[++index] == 't') && (data[++index] == 'a')
2873                 && (data[++index] == 'n') && (data[++index] == 'c')
2874                 && (data[++index] == 'e') && (data[++index] == 'o')
2875                 && (data[++index] == 'f'))
2876               return TokenNameinstanceof;
2877             if ((data[index] == 'm') && (data[++index] == 'p')
2878                 && (data[++index] == 'l') && (data[++index] == 'e')
2879                 && (data[++index] == 'm') && (data[++index] == 'e')
2880                 && (data[++index] == 'n') && (data[++index] == 't')
2881                 && (data[++index] == 's'))
2882               return TokenNameimplements;
2883             else
2884               return TokenNameIdentifier;
2885           case 12 :
2886             if ((data[++index] == 'n') && (data[++index] == 'c')
2887                 && (data[++index] == 'l') && (data[++index] == 'u')
2888                 && (data[++index] == 'd') && (data[++index] == 'e')
2889                 && (data[++index] == '_') && (data[++index] == 'o')
2890                 && (data[++index] == 'n') && (data[++index] == 'c')
2891                 && (data[++index] == 'e'))
2892               return TokenNameinclude_once;
2893             else
2894               return TokenNameIdentifier;
2895           default :
2896             return TokenNameIdentifier;
2897         }
2898       case 'l' :
2899         //list
2900         if (length == 4) {
2901           if ((data[++index] == 'i') && (data[++index] == 's')
2902               && (data[++index] == 't')) {
2903             return TokenNamelist;
2904           }
2905         }
2906         return TokenNameIdentifier;
2907       case 'n' :
2908         // new null
2909         switch (length) {
2910           case 3 :
2911             if ((data[++index] == 'e') && (data[++index] == 'w'))
2912               return TokenNamenew;
2913             else
2914               return TokenNameIdentifier;
2915 //          case 4 :
2916 //            if ((data[++index] == 'u') && (data[++index] == 'l')
2917 //                && (data[++index] == 'l'))
2918 //              return TokenNamenull;
2919 //            else
2920 //              return TokenNameIdentifier;
2921           default :
2922             return TokenNameIdentifier;
2923         }
2924       case 'o' :
2925         // or old_function
2926         if (length == 2) {
2927           if (data[++index] == 'r') {
2928             return TokenNameOR;
2929           }
2930         }
2931         //        if (length == 12) {
2932         //          if ((data[++index] == 'l')
2933         //            && (data[++index] == 'd')
2934         //            && (data[++index] == '_')
2935         //            && (data[++index] == 'f')
2936         //            && (data[++index] == 'u')
2937         //            && (data[++index] == 'n')
2938         //            && (data[++index] == 'c')
2939         //            && (data[++index] == 't')
2940         //            && (data[++index] == 'i')
2941         //            && (data[++index] == 'o')
2942         //            && (data[++index] == 'n')) {
2943         //            return TokenNameold_function;
2944         //          }
2945         //        }
2946         return TokenNameIdentifier;
2947       case 'p' :
2948         // print public private protected
2949         switch (length) {
2950           case 5 :
2951             if ((data[++index] == 'r') && (data[++index] == 'i')
2952                 && (data[++index] == 'n') && (data[++index] == 't')) {
2953               return TokenNameprint;
2954             } else
2955               return TokenNameIdentifier;
2956           case 6 :
2957             if ((data[++index] == 'u') && (data[++index] == 'b')
2958                 && (data[++index] == 'l') && (data[++index] == 'i')
2959                 && (data[++index] == 'c')) {
2960               return TokenNamepublic;
2961             } else
2962               return TokenNameIdentifier;
2963           case 7 :
2964             if ((data[++index] == 'r') && (data[++index] == 'i')
2965                 && (data[++index] == 'v') && (data[++index] == 'a')
2966                 && (data[++index] == 't') && (data[++index] == 'e')) {
2967               return TokenNameprivate;
2968             } else
2969               return TokenNameIdentifier;
2970           case 9 :
2971             if ((data[++index] == 'r') && (data[++index] == 'o')
2972                 && (data[++index] == 't') && (data[++index] == 'e')
2973                 && (data[++index] == 'c') && (data[++index] == 't')
2974                 && (data[++index] == 'e') && (data[++index] == 'd')) {
2975               return TokenNameprotected;
2976             } else
2977               return TokenNameIdentifier;
2978         }
2979         return TokenNameIdentifier;
2980       case 'r' :
2981         //return require require_once
2982         if (length == 6) {
2983           if ((data[++index] == 'e') && (data[++index] == 't')
2984               && (data[++index] == 'u') && (data[++index] == 'r')
2985               && (data[++index] == 'n')) {
2986             return TokenNamereturn;
2987           }
2988         } else if (length == 7) {
2989           if ((data[++index] == 'e') && (data[++index] == 'q')
2990               && (data[++index] == 'u') && (data[++index] == 'i')
2991               && (data[++index] == 'r') && (data[++index] == 'e')) {
2992             return TokenNamerequire;
2993           }
2994         } else if (length == 12) {
2995           if ((data[++index] == 'e') && (data[++index] == 'q')
2996               && (data[++index] == 'u') && (data[++index] == 'i')
2997               && (data[++index] == 'r') && (data[++index] == 'e')
2998               && (data[++index] == '_') && (data[++index] == 'o')
2999               && (data[++index] == 'n') && (data[++index] == 'c')
3000               && (data[++index] == 'e')) {
3001             return TokenNamerequire_once;
3002           }
3003         } else
3004           return TokenNameIdentifier;
3005       case 's' :
3006         //static switch
3007         switch (length) {
3008           case 6 :
3009             if (data[++index] == 't')
3010               if ((data[++index] == 'a') && (data[++index] == 't')
3011                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3012                 return TokenNamestatic;
3013               } else
3014                 return TokenNameIdentifier;
3015             else if ((data[index] == 'w') && (data[++index] == 'i')
3016                 && (data[++index] == 't') && (data[++index] == 'c')
3017                 && (data[++index] == 'h'))
3018               return TokenNameswitch;
3019             else
3020               return TokenNameIdentifier;
3021           default :
3022             return TokenNameIdentifier;
3023         }
3024       case 't' :
3025         // try true throw
3026         switch (length) {
3027           case 3 :
3028             if ((data[++index] == 'r') && (data[++index] == 'y'))
3029               return TokenNametry;
3030             else
3031               return TokenNameIdentifier;
3032 //          case 4 :
3033 //            if ((data[++index] == 'r') && (data[++index] == 'u')
3034 //                && (data[++index] == 'e'))
3035 //              return TokenNametrue;
3036 //            else
3037 //              return TokenNameIdentifier;
3038           case 5 :
3039             if ((data[++index] == 'h') && (data[++index] == 'r')
3040                 && (data[++index] == 'o') && (data[++index] == 'w'))
3041               return TokenNamethrow;
3042             else
3043               return TokenNameIdentifier;
3044           default :
3045             return TokenNameIdentifier;
3046         }
3047       case 'u' :
3048         //use unset
3049         switch (length) {
3050           case 3 :
3051             if ((data[++index] == 's') && (data[++index] == 'e'))
3052               return TokenNameuse;
3053             else
3054               return TokenNameIdentifier;
3055           case 5 :
3056             if ((data[++index] == 'n') && (data[++index] == 's')
3057                 && (data[++index] == 'e') && (data[++index] == 't'))
3058               return TokenNameunset;
3059             else
3060               return TokenNameIdentifier;
3061           default :
3062             return TokenNameIdentifier;
3063         }
3064       case 'v' :
3065         //var
3066         switch (length) {
3067           case 3 :
3068             if ((data[++index] == 'a') && (data[++index] == 'r'))
3069               return TokenNamevar;
3070             else
3071               return TokenNameIdentifier;
3072           default :
3073             return TokenNameIdentifier;
3074         }
3075       case 'w' :
3076         //while
3077         switch (length) {
3078           case 5 :
3079             if ((data[++index] == 'h') && (data[++index] == 'i')
3080                 && (data[++index] == 'l') && (data[++index] == 'e'))
3081               return TokenNamewhile;
3082             else
3083               return TokenNameIdentifier;
3084           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3085           // (data[++index]=='e') && (data[++index]=='f')&&
3086           // (data[++index]=='p'))
3087           //return TokenNamewidefp ;
3088           //else
3089           //return TokenNameIdentifier;
3090           default :
3091             return TokenNameIdentifier;
3092         }
3093       case 'x' :
3094         //xor
3095         switch (length) {
3096           case 3 :
3097             if ((data[++index] == 'o') && (data[++index] == 'r'))
3098               return TokenNameXOR;
3099             else
3100               return TokenNameIdentifier;
3101           default :
3102             return TokenNameIdentifier;
3103         }
3104       default :
3105         return TokenNameIdentifier;
3106     }
3107   }
3108   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3109     //when entering this method the currentCharacter is the firt
3110     //digit of the number , i.e. it may be preceeded by a . when
3111     //dotPrefix is true
3112     boolean floating = dotPrefix;
3113     if ((!dotPrefix) && (currentCharacter == '0')) {
3114       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3115         //force the first char of the hexa number do exist...
3116         // consume next character
3117         unicodeAsBackSlash = false;
3118         currentCharacter = source[currentPosition++];
3119         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3120         //          && (source[currentPosition] == 'u')) {
3121         //          getNextUnicodeChar();
3122         //        } else {
3123         //          if (withoutUnicodePtr != 0) {
3124         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3125         //          }
3126         //        }
3127         if (Character.digit(currentCharacter, 16) == -1)
3128           throw new InvalidInputException(INVALID_HEXA);
3129         //---end forcing--
3130         while (getNextCharAsDigit(16)) {
3131         };
3132         //        if (getNextChar('l', 'L') >= 0)
3133         //          return TokenNameLongLiteral;
3134         //        else
3135         return TokenNameIntegerLiteral;
3136       }
3137       //there is x or X in the number
3138       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3139       // 00078.0 is true !!!!! crazy language
3140       if (getNextCharAsDigit()) {
3141         //-------------potential octal-----------------
3142         while (getNextCharAsDigit()) {
3143         };
3144         //        if (getNextChar('l', 'L') >= 0) {
3145         //          return TokenNameLongLiteral;
3146         //        }
3147         //
3148         //        if (getNextChar('f', 'F') >= 0) {
3149         //          return TokenNameFloatingPointLiteral;
3150         //        }
3151         if (getNextChar('d', 'D') >= 0) {
3152           return TokenNameDoubleLiteral;
3153         } else { //make the distinction between octal and float ....
3154           if (getNextChar('.')) { //bingo ! ....
3155             while (getNextCharAsDigit()) {
3156             };
3157             if (getNextChar('e', 'E') >= 0) {
3158               // consume next character
3159               unicodeAsBackSlash = false;
3160               currentCharacter = source[currentPosition++];
3161               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3162               //                && (source[currentPosition] == 'u')) {
3163               //                getNextUnicodeChar();
3164               //              } else {
3165               //                if (withoutUnicodePtr != 0) {
3166               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3167               //                }
3168               //              }
3169               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3170                 // consume next character
3171                 unicodeAsBackSlash = false;
3172                 currentCharacter = source[currentPosition++];
3173                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3174                 //                  && (source[currentPosition] == 'u')) {
3175                 //                  getNextUnicodeChar();
3176                 //                } else {
3177                 //                  if (withoutUnicodePtr != 0) {
3178                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3179                 //                      currentCharacter;
3180                 //                  }
3181                 //                }
3182               }
3183               if (!Character.isDigit(currentCharacter))
3184                 throw new InvalidInputException(INVALID_FLOAT);
3185               while (getNextCharAsDigit()) {
3186               };
3187             }
3188             //            if (getNextChar('f', 'F') >= 0)
3189             //              return TokenNameFloatingPointLiteral;
3190             getNextChar('d', 'D'); //jump over potential d or D
3191             return TokenNameDoubleLiteral;
3192           } else {
3193             return TokenNameIntegerLiteral;
3194           }
3195         }
3196       } else {
3197         /* carry on */
3198       }
3199     }
3200     while (getNextCharAsDigit()) {
3201     };
3202     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3203     //      return TokenNameLongLiteral;
3204     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3205       while (getNextCharAsDigit()) {
3206       };
3207       floating = true;
3208     }
3209     //if floating is true both exponant and suffix may be optional
3210     if (getNextChar('e', 'E') >= 0) {
3211       floating = true;
3212       // consume next character
3213       unicodeAsBackSlash = false;
3214       currentCharacter = source[currentPosition++];
3215       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3216       //        && (source[currentPosition] == 'u')) {
3217       //        getNextUnicodeChar();
3218       //      } else {
3219       //        if (withoutUnicodePtr != 0) {
3220       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3221       //        }
3222       //      }
3223       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3224         // next
3225         // character
3226         unicodeAsBackSlash = false;
3227         currentCharacter = source[currentPosition++];
3228         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3229         //          && (source[currentPosition] == 'u')) {
3230         //          getNextUnicodeChar();
3231         //        } else {
3232         //          if (withoutUnicodePtr != 0) {
3233         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3234         //          }
3235         //        }
3236       }
3237       if (!Character.isDigit(currentCharacter))
3238         throw new InvalidInputException(INVALID_FLOAT);
3239       while (getNextCharAsDigit()) {
3240       };
3241     }
3242     if (getNextChar('d', 'D') >= 0)
3243       return TokenNameDoubleLiteral;
3244     //    if (getNextChar('f', 'F') >= 0)
3245     //      return TokenNameFloatingPointLiteral;
3246     //the long flag has been tested before
3247     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3248   }
3249   /**
3250    * Search the line number corresponding to a specific position
3251    *  
3252    */
3253   public final int getLineNumber(int position) {
3254     if (lineEnds == null)
3255       return 1;
3256     int length = linePtr + 1;
3257     if (length == 0)
3258       return 1;
3259     int g = 0, d = length - 1;
3260     int m = 0;
3261     while (g <= d) {
3262       m = (g + d) / 2;
3263       if (position < lineEnds[m]) {
3264         d = m - 1;
3265       } else if (position > lineEnds[m]) {
3266         g = m + 1;
3267       } else {
3268         return m + 1;
3269       }
3270     }
3271     if (position < lineEnds[m]) {
3272       return m + 1;
3273     }
3274     return m + 2;
3275   }
3276   public void setPHPMode(boolean mode) {
3277     phpMode = mode;
3278   }
3279   public final void setSource(char[] source) {
3280     //the source-buffer is set to sourceString
3281     if (source == null) {
3282       this.source = new char[0];
3283     } else {
3284       this.source = source;
3285     }
3286     startPosition = -1;
3287     initialPosition = currentPosition = 0;
3288     containsAssertKeyword = false;
3289     withoutUnicodeBuffer = new char[this.source.length];
3290   }
3291   public String toString() {
3292     if (startPosition == source.length)
3293       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3294     if (currentPosition > source.length)
3295       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3296     char front[] = new char[startPosition];
3297     System.arraycopy(source, 0, front, 0, startPosition);
3298     int middleLength = (currentPosition - 1) - startPosition + 1;
3299     char middle[];
3300     if (middleLength > -1) {
3301       middle = new char[middleLength];
3302       System.arraycopy(source, startPosition, middle, 0, middleLength);
3303     } else {
3304       middle = new char[0];
3305     }
3306     char end[] = new char[source.length - (currentPosition - 1)];
3307     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3308         - (currentPosition - 1) - 1);
3309     return new String(front)
3310         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3311         + new String(middle)
3312         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3313         + new String(end);
3314   }
3315   public final String toStringAction(int act) {
3316     switch (act) {
3317       case TokenNameERROR :
3318         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3319       // //$NON-NLS-1$
3320       case TokenNameStopPHP :
3321         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3322       case TokenNameIdentifier :
3323         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3324       case TokenNameVariable :
3325         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3326       case TokenNameabstract :
3327         return "abstract"; //$NON-NLS-1$
3328       case TokenNamearray :
3329         return "array"; //$NON-NLS-1$
3330       case TokenNameas :
3331         return "as"; //$NON-NLS-1$
3332       case TokenNamebreak :
3333         return "break"; //$NON-NLS-1$
3334       case TokenNamecase :
3335         return "case"; //$NON-NLS-1$
3336       case TokenNameclass :
3337         return "class"; //$NON-NLS-1$
3338       case TokenNamecontinue :
3339         return "continue"; //$NON-NLS-1$
3340       case TokenNamedefault :
3341         return "default"; //$NON-NLS-1$
3342       //      case TokenNamedefine :
3343       //        return "define"; //$NON-NLS-1$
3344       case TokenNamedo :
3345         return "do"; //$NON-NLS-1$
3346       case TokenNameecho :
3347         return "echo"; //$NON-NLS-1$
3348       case TokenNameelse :
3349         return "else"; //$NON-NLS-1$
3350       case TokenNameelseif :
3351         return "elseif"; //$NON-NLS-1$
3352       case TokenNameendfor :
3353         return "endfor"; //$NON-NLS-1$
3354       case TokenNameendforeach :
3355         return "endforeach"; //$NON-NLS-1$
3356       case TokenNameendif :
3357         return "endif"; //$NON-NLS-1$
3358       case TokenNameendswitch :
3359         return "endswitch"; //$NON-NLS-1$
3360       case TokenNameendwhile :
3361         return "endwhile"; //$NON-NLS-1$
3362       case TokenNameextends :
3363         return "extends"; //$NON-NLS-1$
3364 //      case TokenNamefalse :
3365 //        return "false"; //$NON-NLS-1$
3366       case TokenNamefinal :
3367         return "final"; //$NON-NLS-1$
3368       case TokenNamefor :
3369         return "for"; //$NON-NLS-1$
3370       case TokenNameforeach :
3371         return "foreach"; //$NON-NLS-1$
3372       case TokenNamefunction :
3373         return "function"; //$NON-NLS-1$
3374       case TokenNameglobal :
3375         return "global"; //$NON-NLS-1$
3376       case TokenNameif :
3377         return "if"; //$NON-NLS-1$
3378       case TokenNameimplements :
3379         return "implements"; //$NON-NLS-1$
3380       case TokenNameinclude :
3381         return "include"; //$NON-NLS-1$
3382       case TokenNameinclude_once :
3383         return "include_once"; //$NON-NLS-1$
3384       case TokenNameinterface :
3385         return "interface"; //$NON-NLS-1$
3386       case TokenNamelist :
3387         return "list"; //$NON-NLS-1$
3388       case TokenNamenew :
3389         return "new"; //$NON-NLS-1$
3390 //      case TokenNamenull :
3391 //        return "null"; //$NON-NLS-1$
3392       case TokenNameprint :
3393         return "print"; //$NON-NLS-1$
3394       case TokenNameprivate :
3395         return "private"; //$NON-NLS-1$
3396       case TokenNameprotected :
3397         return "protected"; //$NON-NLS-1$
3398       case TokenNamepublic :
3399         return "public"; //$NON-NLS-1$
3400       case TokenNamerequire :
3401         return "require"; //$NON-NLS-1$
3402       case TokenNamerequire_once :
3403         return "require_once"; //$NON-NLS-1$
3404       case TokenNamereturn :
3405         return "return"; //$NON-NLS-1$
3406       case TokenNamestatic :
3407         return "static"; //$NON-NLS-1$
3408       case TokenNameswitch :
3409         return "switch"; //$NON-NLS-1$
3410 //      case TokenNametrue :
3411 //        return "true"; //$NON-NLS-1$
3412       case TokenNameunset :
3413         return "unset"; //$NON-NLS-1$
3414       case TokenNamevar :
3415         return "var"; //$NON-NLS-1$
3416       case TokenNamewhile :
3417         return "while"; //$NON-NLS-1$
3418       case TokenNamethis :
3419         return "$this"; //$NON-NLS-1$
3420       case TokenNameIntegerLiteral :
3421         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3422       case TokenNameDoubleLiteral :
3423         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3424       case TokenNameStringLiteral :
3425         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3426       case TokenNameStringConstant :
3427         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3428       case TokenNameStringInterpolated :
3429         return "StringInterpolated(" + new String(getCurrentTokenSource())
3430             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3431       case TokenNameHEREDOC :
3432         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3433       case TokenNamePLUS_PLUS :
3434         return "++"; //$NON-NLS-1$
3435       case TokenNameMINUS_MINUS :
3436         return "--"; //$NON-NLS-1$
3437       case TokenNameEQUAL_EQUAL :
3438         return "=="; //$NON-NLS-1$
3439       case TokenNameEQUAL_EQUAL_EQUAL :
3440         return "==="; //$NON-NLS-1$
3441       case TokenNameEQUAL_GREATER :
3442         return "=>"; //$NON-NLS-1$
3443       case TokenNameLESS_EQUAL :
3444         return "<="; //$NON-NLS-1$
3445       case TokenNameGREATER_EQUAL :
3446         return ">="; //$NON-NLS-1$
3447       case TokenNameNOT_EQUAL :
3448         return "!="; //$NON-NLS-1$
3449       case TokenNameNOT_EQUAL_EQUAL :
3450         return "!=="; //$NON-NLS-1$
3451       case TokenNameLEFT_SHIFT :
3452         return "<<"; //$NON-NLS-1$
3453       case TokenNameRIGHT_SHIFT :
3454         return ">>"; //$NON-NLS-1$
3455       case TokenNamePLUS_EQUAL :
3456         return "+="; //$NON-NLS-1$
3457       case TokenNameMINUS_EQUAL :
3458         return "-="; //$NON-NLS-1$
3459       case TokenNameMULTIPLY_EQUAL :
3460         return "*="; //$NON-NLS-1$
3461       case TokenNameDIVIDE_EQUAL :
3462         return "/="; //$NON-NLS-1$
3463       case TokenNameAND_EQUAL :
3464         return "&="; //$NON-NLS-1$
3465       case TokenNameOR_EQUAL :
3466         return "|="; //$NON-NLS-1$
3467       case TokenNameXOR_EQUAL :
3468         return "^="; //$NON-NLS-1$
3469       case TokenNameREMAINDER_EQUAL :
3470         return "%="; //$NON-NLS-1$
3471       case TokenNameLEFT_SHIFT_EQUAL :
3472         return "<<="; //$NON-NLS-1$
3473       case TokenNameRIGHT_SHIFT_EQUAL :
3474         return ">>="; //$NON-NLS-1$
3475       case TokenNameOR_OR :
3476         return "||"; //$NON-NLS-1$
3477       case TokenNameAND_AND :
3478         return "&&"; //$NON-NLS-1$
3479       case TokenNamePLUS :
3480         return "+"; //$NON-NLS-1$
3481       case TokenNameMINUS :
3482         return "-"; //$NON-NLS-1$
3483       case TokenNameMINUS_GREATER :
3484         return "->";
3485       case TokenNameNOT :
3486         return "!"; //$NON-NLS-1$
3487       case TokenNameREMAINDER :
3488         return "%"; //$NON-NLS-1$
3489       case TokenNameXOR :
3490         return "^"; //$NON-NLS-1$
3491       case TokenNameAND :
3492         return "&"; //$NON-NLS-1$
3493       case TokenNameMULTIPLY :
3494         return "*"; //$NON-NLS-1$
3495       case TokenNameOR :
3496         return "|"; //$NON-NLS-1$
3497       case TokenNameTWIDDLE :
3498         return "~"; //$NON-NLS-1$
3499       case TokenNameTWIDDLE_EQUAL :
3500         return "~="; //$NON-NLS-1$
3501       case TokenNameDIVIDE :
3502         return "/"; //$NON-NLS-1$
3503       case TokenNameGREATER :
3504         return ">"; //$NON-NLS-1$
3505       case TokenNameLESS :
3506         return "<"; //$NON-NLS-1$
3507       case TokenNameLPAREN :
3508         return "("; //$NON-NLS-1$
3509       case TokenNameRPAREN :
3510         return ")"; //$NON-NLS-1$
3511       case TokenNameLBRACE :
3512         return "{"; //$NON-NLS-1$
3513       case TokenNameRBRACE :
3514         return "}"; //$NON-NLS-1$
3515       case TokenNameLBRACKET :
3516         return "["; //$NON-NLS-1$
3517       case TokenNameRBRACKET :
3518         return "]"; //$NON-NLS-1$
3519       case TokenNameSEMICOLON :
3520         return ";"; //$NON-NLS-1$
3521       case TokenNameQUESTION :
3522         return "?"; //$NON-NLS-1$
3523       case TokenNameCOLON :
3524         return ":"; //$NON-NLS-1$
3525       case TokenNameCOMMA :
3526         return ","; //$NON-NLS-1$
3527       case TokenNameDOT :
3528         return "."; //$NON-NLS-1$
3529       case TokenNameEQUAL :
3530         return "="; //$NON-NLS-1$
3531       case TokenNameAT :
3532         return "@";
3533       case TokenNameDOLLAR_LBRACE :
3534         return "${";
3535       case TokenNameEOF :
3536         return "EOF"; //$NON-NLS-1$
3537       case TokenNameWHITESPACE :
3538         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3539       case TokenNameCOMMENT_LINE :
3540         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3541       case TokenNameCOMMENT_BLOCK :
3542         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3543       case TokenNameCOMMENT_PHPDOC :
3544         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3545       case TokenNameHTML :
3546         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3547       case TokenNameFILE :
3548         return "__FILE__"; //$NON-NLS-1$
3549       case TokenNameLINE :
3550         return "__LINE__"; //$NON-NLS-1$
3551       case TokenNameCLASS_C :
3552         return "__CLASS__"; //$NON-NLS-1$
3553       case TokenNameMETHOD_C :
3554         return "__METHOD__"; //$NON-NLS-1$
3555       case TokenNameFUNC_C :
3556         return "__FUNCTION__"; //$NON-NLS-1$
3557       default :
3558         return "not-a-token(" + (new Integer(act)) + ") "
3559             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3560     }
3561   }
3562   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3563       boolean checkNonExternalizedStringLiterals) {
3564     this(tokenizeComments, tokenizeWhiteSpace,
3565         checkNonExternalizedStringLiterals, false);
3566   }
3567   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3568       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3569     this.eofPosition = Integer.MAX_VALUE;
3570     this.tokenizeComments = tokenizeComments;
3571     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3572     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3573     this.assertMode = assertMode;
3574   }
3575   private void checkNonExternalizeString() throws InvalidInputException {
3576     if (currentLine == null)
3577       return;
3578     parseTags(currentLine);
3579   }
3580   private void parseTags(NLSLine line) throws InvalidInputException {
3581     String s = new String(getCurrentTokenSource());
3582     int pos = s.indexOf(TAG_PREFIX);
3583     int lineLength = line.size();
3584     while (pos != -1) {
3585       int start = pos + TAG_PREFIX_LENGTH;
3586       int end = s.indexOf(TAG_POSTFIX, start);
3587       String index = s.substring(start, end);
3588       int i = 0;
3589       try {
3590         i = Integer.parseInt(index) - 1;
3591         // Tags are one based not zero based.
3592       } catch (NumberFormatException e) {
3593         i = -1; // we don't want to consider this as a valid NLS tag
3594       }
3595       if (line.exists(i)) {
3596         line.set(i, null);
3597       }
3598       pos = s.indexOf(TAG_PREFIX, start);
3599     }
3600     this.nonNLSStrings = new StringLiteral[lineLength];
3601     int nonNLSCounter = 0;
3602     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3603       StringLiteral literal = (StringLiteral) iterator.next();
3604       if (literal != null) {
3605         this.nonNLSStrings[nonNLSCounter++] = literal;
3606       }
3607     }
3608     if (nonNLSCounter == 0) {
3609       this.nonNLSStrings = null;
3610       currentLine = null;
3611       return;
3612     }
3613     this.wasNonExternalizedStringLiteral = true;
3614     if (nonNLSCounter != lineLength) {
3615       System.arraycopy(this.nonNLSStrings, 0,
3616           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3617           nonNLSCounter);
3618     }
3619     currentLine = null;
3620   }
3621   public final void scanEscapeCharacter() throws InvalidInputException {
3622     // the string with "\\u" is a legal string of two chars \ and u
3623     //thus we use a direct access to the source (for regular cases).
3624     if (unicodeAsBackSlash) {
3625       // consume next character
3626       unicodeAsBackSlash = false;
3627       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3628       // (source[currentPosition] == 'u')) {
3629       //                                getNextUnicodeChar();
3630       //                        } else {
3631       if (withoutUnicodePtr != 0) {
3632         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3633         //                              }
3634       }
3635     } else
3636       currentCharacter = source[currentPosition++];
3637     switch (currentCharacter) {
3638       case 'b' :
3639         currentCharacter = '\b';
3640         break;
3641       case 't' :
3642         currentCharacter = '\t';
3643         break;
3644       case 'n' :
3645         currentCharacter = '\n';
3646         break;
3647       case 'f' :
3648         currentCharacter = '\f';
3649         break;
3650       case 'r' :
3651         currentCharacter = '\r';
3652         break;
3653       case '\"' :
3654         currentCharacter = '\"';
3655         break;
3656       case '\'' :
3657         currentCharacter = '\'';
3658         break;
3659       case '\\' :
3660         currentCharacter = '\\';
3661         break;
3662       default :
3663         // -----------octal escape--------------
3664         // OctalDigit
3665         // OctalDigit OctalDigit
3666         // ZeroToThree OctalDigit OctalDigit
3667         int number = Character.getNumericValue(currentCharacter);
3668         if (number >= 0 && number <= 7) {
3669           boolean zeroToThreeNot = number > 3;
3670           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3671             int digit = Character.getNumericValue(currentCharacter);
3672             if (digit >= 0 && digit <= 7) {
3673               number = (number * 8) + digit;
3674               if (Character
3675                   .isDigit(currentCharacter = source[currentPosition++])) {
3676                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
3677                   // Digit --> ignore last character
3678                   currentPosition--;
3679                 } else {
3680                   digit = Character.getNumericValue(currentCharacter);
3681                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
3682                     // OctalDigit OctalDigit
3683                     number = (number * 8) + digit;
3684                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
3685                     // --> ignore last character
3686                     currentPosition--;
3687                   }
3688                 }
3689               } else { // has read \OctalDigit NonDigit--> ignore last
3690                 // character
3691                 currentPosition--;
3692               }
3693             } else { // has read \OctalDigit NonOctalDigit--> ignore last
3694               // character
3695               currentPosition--;
3696             }
3697           } else { // has read \OctalDigit --> ignore last character
3698             currentPosition--;
3699           }
3700           if (number > 255)
3701             throw new InvalidInputException(INVALID_ESCAPE);
3702           currentCharacter = (char) number;
3703         } else
3704           throw new InvalidInputException(INVALID_ESCAPE);
3705     }
3706   }
3707   // chech presence of task: tags
3708   public void checkTaskTag(int commentStart, int commentEnd) {
3709     // only look for newer task: tags
3710     if (this.foundTaskCount > 0
3711         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3712       return;
3713     }
3714     int foundTaskIndex = this.foundTaskCount;
3715     nextChar : for (int i = commentStart; i < commentEnd
3716         && i < this.eofPosition; i++) {
3717       char[] tag = null;
3718       char[] priority = null;
3719       // check for tag occurrence
3720       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3721         tag = this.taskTags[itag];
3722         priority = this.taskPriorities != null
3723             && itag < this.taskPriorities.length
3724             ? this.taskPriorities[itag]
3725             : null;
3726         int tagLength = tag.length;
3727         for (int t = 0; t < tagLength; t++) {
3728           if (this.source[i + t] != tag[t])
3729             continue nextTag;
3730         }
3731         if (this.foundTaskTags == null) {
3732           this.foundTaskTags = new char[5][];
3733           this.foundTaskMessages = new char[5][];
3734           this.foundTaskPriorities = new char[5][];
3735           this.foundTaskPositions = new int[5][];
3736         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3737           System.arraycopy(this.foundTaskTags, 0,
3738               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
3739               this.foundTaskCount);
3740           System.arraycopy(this.foundTaskMessages, 0,
3741               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
3742               this.foundTaskCount);
3743           System.arraycopy(this.foundTaskPriorities, 0,
3744               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3745               0, this.foundTaskCount);
3746           System.arraycopy(this.foundTaskPositions, 0,
3747               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
3748               this.foundTaskCount);
3749         }
3750         this.foundTaskTags[this.foundTaskCount] = tag;
3751         this.foundTaskPriorities[this.foundTaskCount] = priority;
3752         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
3753             i + tagLength - 1};
3754         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3755         this.foundTaskCount++;
3756         i += tagLength - 1; // will be incremented when looping
3757       }
3758     }
3759     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3760       // retrieve message start and end positions
3761       int msgStart = this.foundTaskPositions[i][0]
3762           + this.foundTaskTags[i].length;
3763       int max_value = i + 1 < this.foundTaskCount
3764           ? this.foundTaskPositions[i + 1][0] - 1
3765           : commentEnd - 1;
3766       // at most beginning of next task
3767       if (max_value < msgStart)
3768         max_value = msgStart; // would only occur if tag is before EOF.
3769       int end = -1;
3770       char c;
3771       for (int j = msgStart; j < max_value; j++) {
3772         if ((c = this.source[j]) == '\n' || c == '\r') {
3773           end = j - 1;
3774           break;
3775         }
3776       }
3777       if (end == -1) {
3778         for (int j = max_value; j > msgStart; j--) {
3779           if ((c = this.source[j]) == '*') {
3780             end = j - 1;
3781             break;
3782           }
3783         }
3784         if (end == -1)
3785           end = max_value;
3786       }
3787       if (msgStart == end)
3788         continue; // empty
3789       // trim the message
3790       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3791         end--;
3792       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3793         msgStart++;
3794       // update the end position of the task
3795       this.foundTaskPositions[i][1] = end;
3796       // get the message source
3797       final int messageLength = end - msgStart + 1;
3798       char[] message = new char[messageLength];
3799       System.arraycopy(source, msgStart, message, 0, messageLength);
3800       this.foundTaskMessages[i] = message;
3801     }
3802   }
3803 }