3.x compatibility
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12 import java.util.ArrayList;
13 import java.util.Iterator;
14 import java.util.List;
15 import java.util.Stack;
16
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
22 public class Scanner implements IScanner, ITerminalSymbols {
23   /*
24    * APIs ares - getNextToken() which return the current type of the token
25    * (this value is not memorized by the scanner) - getCurrentTokenSource()
26    * which provides with the token "REAL" source (aka all unicode have been
27    * transformed into a correct char) - sourceStart gives the position into the
28    * stream - currentPosition-1 gives the sourceEnd position into the stream
29    */
30   // 1.4 feature
31   private boolean assertMode;
32   public boolean useAssertAsAnIndentifier = false;
33   //flag indicating if processed source contains occurrences of keyword assert
34   public boolean containsAssertKeyword = false;
35   public boolean recordLineSeparator;
36   public boolean phpMode = false;
37   public Stack encapsedStringStack = null;
38   public char currentCharacter;
39   public int startPosition;
40   public int currentPosition;
41   public int initialPosition, eofPosition;
42   // after this position eof are generated instead of real token from the
43   // source
44   public boolean tokenizeComments;
45   public boolean tokenizeWhiteSpace;
46   public boolean tokenizeStrings;
47   //source should be viewed as a window (aka a part)
48   //of a entire very large stream
49   public char source[];
50   //unicode support
51   public char[] withoutUnicodeBuffer;
52   public int withoutUnicodePtr;
53   //when == 0 ==> no unicode in the current token
54   public boolean unicodeAsBackSlash = false;
55   public boolean scanningFloatLiteral = false;
56 //support for /** comments
57         public int[] commentStops = new int[10];
58         public int[] commentStarts = new int[10];
59         public int commentPtr = -1; // no comment test with commentPtr value -1
60         protected int lastCommentLinePosition = -1;
61   //diet parsing support - jump over some method body when requested
62   public boolean diet = false;
63   //support for the poor-line-debuggers ....
64   //remember the position of the cr/lf
65   public int[] lineEnds = new int[250];
66   public int linePtr = -1;
67   public boolean wasAcr = false;
68   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
69   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
70   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
71   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
72   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
73   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
74   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
75   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
76   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
77   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
78   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
79   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
80   //----------------optimized identifier managment------------------
81   static final char[] charArray_a = new char[]{'a'},
82       charArray_b = new char[]{'b'}, charArray_c = new char[]{'c'},
83       charArray_d = new char[]{'d'}, charArray_e = new char[]{'e'},
84       charArray_f = new char[]{'f'}, charArray_g = new char[]{'g'},
85       charArray_h = new char[]{'h'}, charArray_i = new char[]{'i'},
86       charArray_j = new char[]{'j'}, charArray_k = new char[]{'k'},
87       charArray_l = new char[]{'l'}, charArray_m = new char[]{'m'},
88       charArray_n = new char[]{'n'}, charArray_o = new char[]{'o'},
89       charArray_p = new char[]{'p'}, charArray_q = new char[]{'q'},
90       charArray_r = new char[]{'r'}, charArray_s = new char[]{'s'},
91       charArray_t = new char[]{'t'}, charArray_u = new char[]{'u'},
92       charArray_v = new char[]{'v'}, charArray_w = new char[]{'w'},
93       charArray_x = new char[]{'x'}, charArray_y = new char[]{'y'},
94       charArray_z = new char[]{'z'};
95   static final char[] initCharArray = new char[]{'\u0000', '\u0000', '\u0000',
96       '\u0000', '\u0000', '\u0000'};
97   static final int TableSize = 30, InternalTableSize = 6;
98   //30*6 = 180 entries
99   public static final int OptimizedLength = 6;
100   public/* static */
101   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
102   // support for detecting non-externalized string literals
103   int currentLineNr = -1;
104   int previousLineNr = -1;
105   NLSLine currentLine = null;
106   List lines = new ArrayList();
107   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
108   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
109   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
110   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
111   public StringLiteral[] nonNLSStrings = null;
112   public boolean checkNonExternalizedStringLiterals = true;
113   public boolean wasNonExternalizedStringLiteral = false;
114   /* static */{
115     for (int i = 0; i < 6; i++) {
116       for (int j = 0; j < TableSize; j++) {
117         for (int k = 0; k < InternalTableSize; k++) {
118           charArray_length[i][j][k] = initCharArray;
119         }
120       }
121     }
122   }
123   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0,
124       newEntry6 = 0;
125   public static final int RoundBracket = 0;
126   public static final int SquareBracket = 1;
127   public static final int CurlyBracket = 2;
128   public static final int BracketKinds = 3;
129   // task tag support
130   public char[][] foundTaskTags = null;
131   public char[][] foundTaskMessages;
132   public char[][] foundTaskPriorities = null;
133   public int[][] foundTaskPositions;
134   public int foundTaskCount = 0;
135   public char[][] taskTags = null;
136   public char[][] taskPriorities = null;
137   public static final boolean DEBUG = false;
138   public static final boolean TRACE = false;
139
140   /**
141    * Determines if the specified character is permissible as the first
142    * character in a PHP identifier
143    */
144   public static boolean isPHPIdentifierStart(char ch) {
145     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
146   }
147   /**
148    * Determines if the specified character may be part of a PHP identifier as
149    * other than the first character
150    */
151   public static boolean isPHPIdentifierPart(char ch) {
152     return Character.isLetterOrDigit(ch) || (ch == '_')
153         || (0x7F <= ch && ch <= 0xFF);
154   }
155   public final boolean atEnd() {
156     // This code is not relevant if source is
157     // Only a part of the real stream input
158     return source.length == currentPosition;
159   }
160   public char[] getCurrentIdentifierSource() {
161     //return the token REAL source (aka unicodes are precomputed)
162     char[] result;
163     //    if (withoutUnicodePtr != 0)
164     //      //0 is used as a fast test flag so the real first char is in position 1
165     //      System.arraycopy(
166     //        withoutUnicodeBuffer,
167     //        1,
168     //        result = new char[withoutUnicodePtr],
169     //        0,
170     //        withoutUnicodePtr);
171     //    else {
172     int length = currentPosition - startPosition;
173     switch (length) { // see OptimizedLength
174       case 1 :
175         return optimizedCurrentTokenSource1();
176       case 2 :
177         return optimizedCurrentTokenSource2();
178       case 3 :
179         return optimizedCurrentTokenSource3();
180       case 4 :
181         return optimizedCurrentTokenSource4();
182       case 5 :
183         return optimizedCurrentTokenSource5();
184       case 6 :
185         return optimizedCurrentTokenSource6();
186     }
187     //no optimization
188     System.arraycopy(source, startPosition, result = new char[length], 0,
189         length);
190     //   }
191     return result;
192   }
193   public int getCurrentTokenEndPosition() {
194     return this.currentPosition - 1;
195   }
196   public final char[] getCurrentTokenSource() {
197     // Return the token REAL source (aka unicodes are precomputed)
198     char[] result;
199     //    if (withoutUnicodePtr != 0)
200     //      // 0 is used as a fast test flag so the real first char is in position 1
201     //      System.arraycopy(
202     //        withoutUnicodeBuffer,
203     //        1,
204     //        result = new char[withoutUnicodePtr],
205     //        0,
206     //        withoutUnicodePtr);
207     //    else {
208     int length;
209     System.arraycopy(source, startPosition,
210         result = new char[length = currentPosition - startPosition], 0, length);
211     //    }
212     return result;
213   }
214   public final char[] getCurrentTokenSource(int startPos) {
215     // Return the token REAL source (aka unicodes are precomputed)
216     char[] result;
217     //    if (withoutUnicodePtr != 0)
218     //      // 0 is used as a fast test flag so the real first char is in position 1
219     //      System.arraycopy(
220     //        withoutUnicodeBuffer,
221     //        1,
222     //        result = new char[withoutUnicodePtr],
223     //        0,
224     //        withoutUnicodePtr);
225     //    else {
226     int length;
227     System.arraycopy(source, startPos,
228         result = new char[length = currentPosition - startPos], 0, length);
229     //  }
230     return result;
231   }
232   public final char[] getCurrentTokenSourceString() {
233     //return the token REAL source (aka unicodes are precomputed).
234     //REMOVE the two " that are at the beginning and the end.
235     char[] result;
236     if (withoutUnicodePtr != 0)
237       //0 is used as a fast test flag so the real first char is in position 1
238       System.arraycopy(withoutUnicodeBuffer, 2, 
239       //2 is 1 (real start) + 1 (to jump over the ")
240           result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
241     else {
242       int length;
243       System.arraycopy(source, startPosition + 1,
244           result = new char[length = currentPosition - startPosition - 2], 0,
245           length);
246     }
247     return result;
248   }
249   public int getCurrentTokenStartPosition() {
250     return this.startPosition;
251   }
252   public final char[] getCurrentStringLiteralSource() {
253     // Return the token REAL source (aka unicodes are precomputed)
254     char[] result;
255     int length;
256     System.arraycopy(source, startPosition + 1,
257         result = new char[length = currentPosition - startPosition - 2], 0,
258         length);
259     //    }
260     return result;
261   }
262   /*
263    * Search the source position corresponding to the end of a given line number
264    * 
265    * Line numbers are 1-based, and relative to the scanner initialPosition.
266    * Character positions are 0-based.
267    * 
268    * In case the given line number is inconsistent, answers -1.
269    */
270   public final int getLineEnd(int lineNumber) {
271     if (lineEnds == null)
272       return -1;
273     if (lineNumber >= lineEnds.length)
274       return -1;
275     if (lineNumber <= 0)
276       return -1;
277     if (lineNumber == lineEnds.length - 1)
278       return eofPosition;
279     return lineEnds[lineNumber - 1];
280     // next line start one character behind the lineEnd of the previous line
281   }
282   /**
283    * Search the source position corresponding to the beginning of a given line
284    * number
285    * 
286    * Line numbers are 1-based, and relative to the scanner initialPosition.
287    * Character positions are 0-based.
288    * 
289    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
290    * 
291    * In case the given line number is inconsistent, answers -1.
292    */
293   public final int getLineStart(int lineNumber) {
294     if (lineEnds == null)
295       return -1;
296     if (lineNumber >= lineEnds.length)
297       return -1;
298     if (lineNumber <= 0)
299       return -1;
300     if (lineNumber == 1)
301       return initialPosition;
302     return lineEnds[lineNumber - 2] + 1;
303     // next line start one character behind the lineEnd of the previous line
304   }
305   public final boolean getNextChar(char testedChar) {
306     //BOOLEAN
307     //handle the case of unicode.
308     //when a unicode appears then we must use a buffer that holds char
309     // internal values
310     //At the end of this method currentCharacter holds the new visited char
311     //and currentPosition points right next after it
312     //Both previous lines are true if the currentCharacter is == to the
313     // testedChar
314     //On false, no side effect has occured.
315     //ALL getNextChar.... ARE OPTIMIZED COPIES
316     int temp = currentPosition;
317     try {
318       currentCharacter = source[currentPosition++];
319       //      if (((currentCharacter = source[currentPosition++]) == '\\')
320       //        && (source[currentPosition] == 'u')) {
321       //        //-------------unicode traitement ------------
322       //        int c1, c2, c3, c4;
323       //        int unicodeSize = 6;
324       //        currentPosition++;
325       //        while (source[currentPosition] == 'u') {
326       //          currentPosition++;
327       //          unicodeSize++;
328       //        }
329       //
330       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
331       //          || c1 < 0)
332       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
333       //            || c2 < 0)
334       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
335       //            || c3 < 0)
336       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
337       //            || c4 < 0)) {
338       //          currentPosition = temp;
339       //          return false;
340       //        }
341       //
342       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
343       //        if (currentCharacter != testedChar) {
344       //          currentPosition = temp;
345       //          return false;
346       //        }
347       //        unicodeAsBackSlash = currentCharacter == '\\';
348       //
349       //        //need the unicode buffer
350       //        if (withoutUnicodePtr == 0) {
351       //          //buffer all the entries that have been left aside....
352       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
353       //          System.arraycopy(
354       //            source,
355       //            startPosition,
356       //            withoutUnicodeBuffer,
357       //            1,
358       //            withoutUnicodePtr);
359       //        }
360       //        //fill the buffer with the char
361       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
362       //        return true;
363       //
364       //      } //-------------end unicode traitement--------------
365       //      else {
366       if (currentCharacter != testedChar) {
367         currentPosition = temp;
368         return false;
369       }
370       unicodeAsBackSlash = false;
371       //        if (withoutUnicodePtr != 0)
372       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
373       return true;
374       //      }
375     } catch (IndexOutOfBoundsException e) {
376       unicodeAsBackSlash = false;
377       currentPosition = temp;
378       return false;
379     }
380   }
381   public final int getNextChar(char testedChar1, char testedChar2) {
382     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
383     //test can be done with (x==0) for the first and (x>0) for the second
384     //handle the case of unicode.
385     //when a unicode appears then we must use a buffer that holds char
386     // internal values
387     //At the end of this method currentCharacter holds the new visited char
388     //and currentPosition points right next after it
389     //Both previous lines are true if the currentCharacter is == to the
390     // testedChar1/2
391     //On false, no side effect has occured.
392     //ALL getNextChar.... ARE OPTIMIZED COPIES
393     int temp = currentPosition;
394     try {
395       int result;
396       currentCharacter = source[currentPosition++];
397       //      if (((currentCharacter = source[currentPosition++]) == '\\')
398       //        && (source[currentPosition] == 'u')) {
399       //        //-------------unicode traitement ------------
400       //        int c1, c2, c3, c4;
401       //        int unicodeSize = 6;
402       //        currentPosition++;
403       //        while (source[currentPosition] == 'u') {
404       //          currentPosition++;
405       //          unicodeSize++;
406       //        }
407       //
408       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
409       //          || c1 < 0)
410       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
411       //            || c2 < 0)
412       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
413       //            || c3 < 0)
414       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
415       //            || c4 < 0)) {
416       //          currentPosition = temp;
417       //          return 2;
418       //        }
419       //
420       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
421       //        if (currentCharacter == testedChar1)
422       //          result = 0;
423       //        else if (currentCharacter == testedChar2)
424       //          result = 1;
425       //        else {
426       //          currentPosition = temp;
427       //          return -1;
428       //        }
429       //
430       //        //need the unicode buffer
431       //        if (withoutUnicodePtr == 0) {
432       //          //buffer all the entries that have been left aside....
433       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
434       //          System.arraycopy(
435       //            source,
436       //            startPosition,
437       //            withoutUnicodeBuffer,
438       //            1,
439       //            withoutUnicodePtr);
440       //        }
441       //        //fill the buffer with the char
442       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
443       //        return result;
444       //      } //-------------end unicode traitement--------------
445       //      else {
446       if (currentCharacter == testedChar1)
447         result = 0;
448       else if (currentCharacter == testedChar2)
449         result = 1;
450       else {
451         currentPosition = temp;
452         return -1;
453       }
454       //        if (withoutUnicodePtr != 0)
455       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
456       return result;
457       //     }
458     } catch (IndexOutOfBoundsException e) {
459       currentPosition = temp;
460       return -1;
461     }
462   }
463   public final boolean getNextCharAsDigit() {
464     //BOOLEAN
465     //handle the case of unicode.
466     //when a unicode appears then we must use a buffer that holds char
467     // internal values
468     //At the end of this method currentCharacter holds the new visited char
469     //and currentPosition points right next after it
470     //Both previous lines are true if the currentCharacter is a digit
471     //On false, no side effect has occured.
472     //ALL getNextChar.... ARE OPTIMIZED COPIES
473     int temp = currentPosition;
474     try {
475       currentCharacter = source[currentPosition++];
476       //      if (((currentCharacter = source[currentPosition++]) == '\\')
477       //        && (source[currentPosition] == 'u')) {
478       //        //-------------unicode traitement ------------
479       //        int c1, c2, c3, c4;
480       //        int unicodeSize = 6;
481       //        currentPosition++;
482       //        while (source[currentPosition] == 'u') {
483       //          currentPosition++;
484       //          unicodeSize++;
485       //        }
486       //
487       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
488       //          || c1 < 0)
489       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
490       //            || c2 < 0)
491       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
492       //            || c3 < 0)
493       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
494       //            || c4 < 0)) {
495       //          currentPosition = temp;
496       //          return false;
497       //        }
498       //
499       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
500       //        if (!Character.isDigit(currentCharacter)) {
501       //          currentPosition = temp;
502       //          return false;
503       //        }
504       //
505       //        //need the unicode buffer
506       //        if (withoutUnicodePtr == 0) {
507       //          //buffer all the entries that have been left aside....
508       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
509       //          System.arraycopy(
510       //            source,
511       //            startPosition,
512       //            withoutUnicodeBuffer,
513       //            1,
514       //            withoutUnicodePtr);
515       //        }
516       //        //fill the buffer with the char
517       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
518       //        return true;
519       //      } //-------------end unicode traitement--------------
520       //      else {
521       if (!Character.isDigit(currentCharacter)) {
522         currentPosition = temp;
523         return false;
524       }
525       //        if (withoutUnicodePtr != 0)
526       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
527       return true;
528       //      }
529     } catch (IndexOutOfBoundsException e) {
530       currentPosition = temp;
531       return false;
532     }
533   }
534   public final boolean getNextCharAsDigit(int radix) {
535     //BOOLEAN
536     //handle the case of unicode.
537     //when a unicode appears then we must use a buffer that holds char
538     // internal values
539     //At the end of this method currentCharacter holds the new visited char
540     //and currentPosition points right next after it
541     //Both previous lines are true if the currentCharacter is a digit base on
542     // radix
543     //On false, no side effect has occured.
544     //ALL getNextChar.... ARE OPTIMIZED COPIES
545     int temp = currentPosition;
546     try {
547       currentCharacter = source[currentPosition++];
548       //      if (((currentCharacter = source[currentPosition++]) == '\\')
549       //        && (source[currentPosition] == 'u')) {
550       //        //-------------unicode traitement ------------
551       //        int c1, c2, c3, c4;
552       //        int unicodeSize = 6;
553       //        currentPosition++;
554       //        while (source[currentPosition] == 'u') {
555       //          currentPosition++;
556       //          unicodeSize++;
557       //        }
558       //
559       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
560       //          || c1 < 0)
561       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
562       //            || c2 < 0)
563       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
564       //            || c3 < 0)
565       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
566       //            || c4 < 0)) {
567       //          currentPosition = temp;
568       //          return false;
569       //        }
570       //
571       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
572       //        if (Character.digit(currentCharacter, radix) == -1) {
573       //          currentPosition = temp;
574       //          return false;
575       //        }
576       //
577       //        //need the unicode buffer
578       //        if (withoutUnicodePtr == 0) {
579       //          //buffer all the entries that have been left aside....
580       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
581       //          System.arraycopy(
582       //            source,
583       //            startPosition,
584       //            withoutUnicodeBuffer,
585       //            1,
586       //            withoutUnicodePtr);
587       //        }
588       //        //fill the buffer with the char
589       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
590       //        return true;
591       //      } //-------------end unicode traitement--------------
592       //      else {
593       if (Character.digit(currentCharacter, radix) == -1) {
594         currentPosition = temp;
595         return false;
596       }
597       //        if (withoutUnicodePtr != 0)
598       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
599       return true;
600       //      }
601     } catch (IndexOutOfBoundsException e) {
602       currentPosition = temp;
603       return false;
604     }
605   }
606   public boolean getNextCharAsJavaIdentifierPart() {
607     //BOOLEAN
608     //handle the case of unicode.
609     //when a unicode appears then we must use a buffer that holds char
610     // internal values
611     //At the end of this method currentCharacter holds the new visited char
612     //and currentPosition points right next after it
613     //Both previous lines are true if the currentCharacter is a
614     // JavaIdentifierPart
615     //On false, no side effect has occured.
616     //ALL getNextChar.... ARE OPTIMIZED COPIES
617     int temp = currentPosition;
618     try {
619       currentCharacter = source[currentPosition++];
620       //      if (((currentCharacter = source[currentPosition++]) == '\\')
621       //        && (source[currentPosition] == 'u')) {
622       //        //-------------unicode traitement ------------
623       //        int c1, c2, c3, c4;
624       //        int unicodeSize = 6;
625       //        currentPosition++;
626       //        while (source[currentPosition] == 'u') {
627       //          currentPosition++;
628       //          unicodeSize++;
629       //        }
630       //
631       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
632       //          || c1 < 0)
633       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
634       //            || c2 < 0)
635       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
636       //            || c3 < 0)
637       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
638       //            || c4 < 0)) {
639       //          currentPosition = temp;
640       //          return false;
641       //        }
642       //
643       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
644       //        if (!isPHPIdentifierPart(currentCharacter)) {
645       //          currentPosition = temp;
646       //          return false;
647       //        }
648       //
649       //        //need the unicode buffer
650       //        if (withoutUnicodePtr == 0) {
651       //          //buffer all the entries that have been left aside....
652       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
653       //          System.arraycopy(
654       //            source,
655       //            startPosition,
656       //            withoutUnicodeBuffer,
657       //            1,
658       //            withoutUnicodePtr);
659       //        }
660       //        //fill the buffer with the char
661       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
662       //        return true;
663       //      } //-------------end unicode traitement--------------
664       //      else {
665       if (!isPHPIdentifierPart(currentCharacter)) {
666         currentPosition = temp;
667         return false;
668       }
669       //        if (withoutUnicodePtr != 0)
670       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
671       return true;
672       //      }
673     } catch (IndexOutOfBoundsException e) {
674       currentPosition = temp;
675       return false;
676     }
677   }
678   public int getCastOrParen() {
679     int tempPosition = currentPosition;
680     char tempCharacter = currentCharacter;
681     int tempToken = TokenNameLPAREN;
682     boolean found = false;
683     StringBuffer buf = new StringBuffer();
684     try {
685       do {
686         currentCharacter = source[currentPosition++];
687       } while (currentCharacter == ' ' || currentCharacter == '\t');
688       while ((currentCharacter >= 'a' && currentCharacter <= 'z')
689           || (currentCharacter >= 'A' && currentCharacter <= 'Z')) {
690         buf.append(currentCharacter);
691         currentCharacter = source[currentPosition++];
692       }
693       if (buf.length() >= 3 && buf.length() <= 7) {
694         char[] data = buf.toString().toCharArray();
695         int index = 0;
696         switch (data.length) {
697           case 3 :
698             // int
699             if ((data[index] == 'i') && (data[++index] == 'n')
700                 && (data[++index] == 't')) {
701               found = true;
702               tempToken = TokenNameintCAST;
703             }
704             break;
705           case 4 :
706             // bool real
707             if ((data[index] == 'b') && (data[++index] == 'o')
708                 && (data[++index] == 'o') && (data[++index] == 'l')) {
709               found = true;
710               tempToken = TokenNameboolCAST;
711             } else {
712               index = 0;
713               if ((data[index] == 'r') && (data[++index] == 'e')
714                   && (data[++index] == 'a') && (data[++index] == 'l')) {
715                 found = true;
716                 tempToken = TokenNamedoubleCAST;
717               }
718             }
719             break;
720           case 5 :
721             // array unset float
722             if ((data[index] == 'a') && (data[++index] == 'r')
723                 && (data[++index] == 'r') && (data[++index] == 'a')
724                 && (data[++index] == 'y')) {
725               found = true;
726               tempToken = TokenNamearrayCAST;
727             } else {
728               index = 0;
729               if ((data[index] == 'u') && (data[++index] == 'n')
730                   && (data[++index] == 's') && (data[++index] == 'e')
731                   && (data[++index] == 't')) {
732                 found = true;
733                 tempToken = TokenNameunsetCAST;
734               } else {
735                 index = 0;
736                 if ((data[index] == 'f') && (data[++index] == 'l')
737                     && (data[++index] == 'o') && (data[++index] == 'a')
738                     && (data[++index] == 't')) {
739                   found = true;
740                   tempToken = TokenNamedoubleCAST;
741                 }
742               }
743             }
744             break;
745           case 6 :
746             // object string double
747             if ((data[index] == 'o') && (data[++index] == 'b')
748                 && (data[++index] == 'j') && (data[++index] == 'e')
749                 && (data[++index] == 'c') && (data[++index] == 't')) {
750               found = true;
751               tempToken = TokenNameobjectCAST;
752             } else {
753               index = 0;
754               if ((data[index] == 's') && (data[++index] == 't')
755                   && (data[++index] == 'r') && (data[++index] == 'i')
756                   && (data[++index] == 'n') && (data[++index] == 'g')) {
757                 found = true;
758                 tempToken = TokenNamestringCAST;
759               } else {
760                 index = 0;
761                 if ((data[index] == 'd') && (data[++index] == 'o')
762                     && (data[++index] == 'u') && (data[++index] == 'b')
763                     && (data[++index] == 'l') && (data[++index] == 'e')) {
764                   found = true;
765                   tempToken = TokenNamedoubleCAST;
766                 }
767               }
768             }
769             break;
770           case 7 :
771             // boolean integer
772             if ((data[index] == 'b') && (data[++index] == 'o')
773                 && (data[++index] == 'o') && (data[++index] == 'l')
774                 && (data[++index] == 'e') && (data[++index] == 'a')
775                 && (data[++index] == 'n')) {
776               found = true;
777               tempToken = TokenNameboolCAST;
778             } else {
779               index = 0;
780               if ((data[index] == 'i') && (data[++index] == 'n')
781                   && (data[++index] == 't') && (data[++index] == 'e')
782                   && (data[++index] == 'g') && (data[++index] == 'e')
783                   && (data[++index] == 'r')) {
784                 found = true;
785                 tempToken = TokenNameintCAST;
786               }
787             }
788             break;
789         }
790         if (found) {
791           while (currentCharacter == ' ' || currentCharacter == '\t') {
792             currentCharacter = source[currentPosition++];
793           }
794           if (currentCharacter == ')') {
795             return tempToken;
796           }
797         }
798       }
799     } catch (IndexOutOfBoundsException e) {
800     }
801     currentCharacter = tempCharacter;
802     currentPosition = tempPosition;
803     return TokenNameLPAREN;
804   }
805   public void consumeStringInterpolated() throws InvalidInputException {
806     try {
807       // consume next character
808       unicodeAsBackSlash = false;
809       currentCharacter = source[currentPosition++];
810       //                if (((currentCharacter = source[currentPosition++]) == '\\')
811       //                  && (source[currentPosition] == 'u')) {
812       //                  getNextUnicodeChar();
813       //                } else {
814       //                  if (withoutUnicodePtr != 0) {
815       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
816       //                      currentCharacter;
817       //                  }
818       //                }
819       while (currentCharacter != '`') {
820         /** ** in PHP \r and \n are valid in string literals *** */
821         //                if ((currentCharacter == '\n')
822         //                  || (currentCharacter == '\r')) {
823         //                  // relocate if finding another quote fairly close: thus unicode
824         // '/u000D' will be fully consumed
825         //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
826         //                    if (currentPosition + lookAhead == source.length)
827         //                      break;
828         //                    if (source[currentPosition + lookAhead] == '\n')
829         //                      break;
830         //                    if (source[currentPosition + lookAhead] == '\"') {
831         //                      currentPosition += lookAhead + 1;
832         //                      break;
833         //                    }
834         //                  }
835         //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
836         //                }
837         if (currentCharacter == '\\') {
838           int escapeSize = currentPosition;
839           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
840           //scanEscapeCharacter make a side effect on this value and we need
841           // the previous value few lines down this one
842           scanDoubleQuotedEscapeCharacter();
843           escapeSize = currentPosition - escapeSize;
844           if (withoutUnicodePtr == 0) {
845             //buffer all the entries that have been left aside....
846             withoutUnicodePtr = currentPosition - escapeSize - 1
847                 - startPosition;
848             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
849                 withoutUnicodePtr);
850             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
851           } else { //overwrite the / in the buffer
852             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
853             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
854                                               // where only one is correct
855               withoutUnicodePtr--;
856             }
857           }
858         }
859         // consume next character
860         unicodeAsBackSlash = false;
861         currentCharacter = source[currentPosition++];
862         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
863         //                    && (source[currentPosition] == 'u')) {
864         //                    getNextUnicodeChar();
865         //                  } else {
866         if (withoutUnicodePtr != 0) {
867           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
868         }
869         //                  }
870       }
871     } catch (IndexOutOfBoundsException e) {
872       throw new InvalidInputException(UNTERMINATED_STRING);
873     } catch (InvalidInputException e) {
874       if (e.getMessage().equals(INVALID_ESCAPE)) {
875         // relocate if finding another quote fairly close: thus unicode
876         // '/u000D' will be fully consumed
877         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
878           if (currentPosition + lookAhead == source.length)
879             break;
880           if (source[currentPosition + lookAhead] == '\n')
881             break;
882           if (source[currentPosition + lookAhead] == '`') {
883             currentPosition += lookAhead + 1;
884             break;
885           }
886         }
887       }
888       throw e; // rethrow
889     }
890     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
891                                               // //$NON-NLS-?$ where ? is an
892                                               // int.
893       if (currentLine == null) {
894         currentLine = new NLSLine();
895         lines.add(currentLine);
896       }
897       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
898           startPosition, currentPosition - 1));
899     }
900   }
901   public void consumeStringConstant() throws InvalidInputException {
902     try {
903       // consume next character
904       unicodeAsBackSlash = false;
905       currentCharacter = source[currentPosition++];
906       //                if (((currentCharacter = source[currentPosition++]) == '\\')
907       //                  && (source[currentPosition] == 'u')) {
908       //                  getNextUnicodeChar();
909       //                } else {
910       //                  if (withoutUnicodePtr != 0) {
911       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
912       //                      currentCharacter;
913       //                  }
914       //                }
915       while (currentCharacter != '\'') {
916         /** ** in PHP \r and \n are valid in string literals *** */
917         //                  if ((currentCharacter == '\n')
918         //                    || (currentCharacter == '\r')) {
919         //                    // relocate if finding another quote fairly close: thus unicode
920         // '/u000D' will be fully consumed
921         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
922         //                      if (currentPosition + lookAhead == source.length)
923         //                        break;
924         //                      if (source[currentPosition + lookAhead] == '\n')
925         //                        break;
926         //                      if (source[currentPosition + lookAhead] == '\"') {
927         //                        currentPosition += lookAhead + 1;
928         //                        break;
929         //                      }
930         //                    }
931         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
932         //                  }
933         if (currentCharacter == '\\') {
934           int escapeSize = currentPosition;
935           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
936           //scanEscapeCharacter make a side effect on this value and we need
937           // the previous value few lines down this one
938           scanSingleQuotedEscapeCharacter();
939           escapeSize = currentPosition - escapeSize;
940           if (withoutUnicodePtr == 0) {
941             //buffer all the entries that have been left aside....
942             withoutUnicodePtr = currentPosition - escapeSize - 1
943                 - startPosition;
944             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
945                 withoutUnicodePtr);
946             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
947           } else { //overwrite the / in the buffer
948             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
949             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
950                                               // where only one is correct
951               withoutUnicodePtr--;
952             }
953           }
954         }
955         // consume next character
956         unicodeAsBackSlash = false;
957         currentCharacter = source[currentPosition++];
958         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
959         //                    && (source[currentPosition] == 'u')) {
960         //                    getNextUnicodeChar();
961         //                  } else {
962         if (withoutUnicodePtr != 0) {
963           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
964         }
965         //                  }
966       }
967     } catch (IndexOutOfBoundsException e) {
968       throw new InvalidInputException(UNTERMINATED_STRING);
969     } catch (InvalidInputException e) {
970       if (e.getMessage().equals(INVALID_ESCAPE)) {
971         // relocate if finding another quote fairly close: thus unicode
972         // '/u000D' will be fully consumed
973         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
974           if (currentPosition + lookAhead == source.length)
975             break;
976           if (source[currentPosition + lookAhead] == '\n')
977             break;
978           if (source[currentPosition + lookAhead] == '\'') {
979             currentPosition += lookAhead + 1;
980             break;
981           }
982         }
983       }
984       throw e; // rethrow
985     }
986     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
987                                               // //$NON-NLS-?$ where ? is an
988                                               // int.
989       if (currentLine == null) {
990         currentLine = new NLSLine();
991         lines.add(currentLine);
992       }
993       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
994           startPosition, currentPosition - 1));
995     }
996   }
997   public void consumeStringLiteral() throws InvalidInputException {
998     try {
999       // consume next character
1000       unicodeAsBackSlash = false;
1001       currentCharacter = source[currentPosition++];
1002       //                if (((currentCharacter = source[currentPosition++]) == '\\')
1003       //                  && (source[currentPosition] == 'u')) {
1004       //                  getNextUnicodeChar();
1005       //                } else {
1006       //                  if (withoutUnicodePtr != 0) {
1007       //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1008       //                      currentCharacter;
1009       //                  }
1010       //                }
1011       while (currentCharacter != '"') {
1012         /** ** in PHP \r and \n are valid in string literals *** */
1013         //                  if ((currentCharacter == '\n')
1014         //                    || (currentCharacter == '\r')) {
1015         //                    // relocate if finding another quote fairly close: thus unicode
1016         // '/u000D' will be fully consumed
1017         //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1018         //                      if (currentPosition + lookAhead == source.length)
1019         //                        break;
1020         //                      if (source[currentPosition + lookAhead] == '\n')
1021         //                        break;
1022         //                      if (source[currentPosition + lookAhead] == '\"') {
1023         //                        currentPosition += lookAhead + 1;
1024         //                        break;
1025         //                      }
1026         //                    }
1027         //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1028         //                  }
1029         if (currentCharacter == '\\') {
1030           int escapeSize = currentPosition;
1031           boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1032           //scanEscapeCharacter make a side effect on this value and we need
1033           // the previous value few lines down this one
1034           scanDoubleQuotedEscapeCharacter();
1035           escapeSize = currentPosition - escapeSize;
1036           if (withoutUnicodePtr == 0) {
1037             //buffer all the entries that have been left aside....
1038             withoutUnicodePtr = currentPosition - escapeSize - 1
1039                 - startPosition;
1040             System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1,
1041                 withoutUnicodePtr);
1042             withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1043           } else { //overwrite the / in the buffer
1044             withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1045             if (backSlashAsUnicodeInString) { //there are TWO \ in the stream
1046                                               // where only one is correct
1047               withoutUnicodePtr--;
1048             }
1049           }
1050         }
1051         // consume next character
1052         unicodeAsBackSlash = false;
1053         currentCharacter = source[currentPosition++];
1054         //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1055         //                    && (source[currentPosition] == 'u')) {
1056         //                    getNextUnicodeChar();
1057         //                  } else {
1058         if (withoutUnicodePtr != 0) {
1059           withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1060         }
1061         //                  }
1062       }
1063     } catch (IndexOutOfBoundsException e) {
1064       throw new InvalidInputException(UNTERMINATED_STRING);
1065     } catch (InvalidInputException e) {
1066       if (e.getMessage().equals(INVALID_ESCAPE)) {
1067         // relocate if finding another quote fairly close: thus unicode
1068         // '/u000D' will be fully consumed
1069         for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1070           if (currentPosition + lookAhead == source.length)
1071             break;
1072           if (source[currentPosition + lookAhead] == '\n')
1073             break;
1074           if (source[currentPosition + lookAhead] == '\"') {
1075             currentPosition += lookAhead + 1;
1076             break;
1077           }
1078         }
1079       }
1080       throw e; // rethrow
1081     }
1082     if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags
1083                                               // //$NON-NLS-?$ where ? is an
1084                                               // int.
1085       if (currentLine == null) {
1086         currentLine = new NLSLine();
1087         lines.add(currentLine);
1088       }
1089       currentLine.add(new StringLiteral(getCurrentTokenSourceString(),
1090           startPosition, currentPosition - 1));
1091     }
1092   }
1093   public int getNextToken() throws InvalidInputException {
1094     if (!phpMode) {
1095       return getInlinedHTML(currentPosition);
1096     }
1097     if (phpMode) {
1098       this.wasAcr = false;
1099       if (diet) {
1100         jumpOverMethodBody();
1101         diet = false;
1102         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
1103       }
1104       try {
1105         while (true) {
1106           withoutUnicodePtr = 0;
1107           //start with a new token
1108           char encapsedChar = ' ';
1109           if (!encapsedStringStack.isEmpty()) {
1110             encapsedChar = ((Character) encapsedStringStack.peek()).charValue();
1111           }
1112           if (encapsedChar != '$' && encapsedChar != ' ') {
1113             currentCharacter = source[currentPosition++];
1114             if (currentCharacter == encapsedChar) {
1115               switch (currentCharacter) {
1116                 case '`' :
1117                   return TokenNameEncapsedString0;
1118                 case '\'' :
1119                   return TokenNameEncapsedString1;
1120                 case '"' :
1121                   return TokenNameEncapsedString2;
1122               }
1123             }
1124             while (currentCharacter != encapsedChar) {
1125               /** ** in PHP \r and \n are valid in string literals *** */
1126               switch (currentCharacter) {
1127                 case '\\' :
1128                   int escapeSize = currentPosition;
1129                   boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1130                   //scanEscapeCharacter make a side effect on this value and
1131                   // we need the previous value few lines down this one
1132                   scanDoubleQuotedEscapeCharacter();
1133                   escapeSize = currentPosition - escapeSize;
1134                   if (withoutUnicodePtr == 0) {
1135                     //buffer all the entries that have been left aside....
1136                     withoutUnicodePtr = currentPosition - escapeSize - 1
1137                         - startPosition;
1138                     System.arraycopy(source, startPosition,
1139                         withoutUnicodeBuffer, 1, withoutUnicodePtr);
1140                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1141                   } else { //overwrite the / in the buffer
1142                     withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1143                     if (backSlashAsUnicodeInString) { //there are TWO \ in
1144                       withoutUnicodePtr--;
1145                     }
1146                   }
1147                   break;
1148                 case '$' :
1149                   if (isPHPIdentifierStart(source[currentPosition])
1150                       || source[currentPosition] == '{') {
1151                     currentPosition--;
1152                     encapsedStringStack.push(new Character('$'));
1153                     return TokenNameSTRING;
1154                   }
1155                   break;
1156                 case '{' :
1157                   if (source[currentPosition] == '$') { // CURLY_OPEN
1158                     currentPosition--;
1159                     encapsedStringStack.push(new Character('$'));
1160                     return TokenNameSTRING;
1161                   }
1162               }
1163               // consume next character
1164               unicodeAsBackSlash = false;
1165               currentCharacter = source[currentPosition++];
1166               if (withoutUnicodePtr != 0) {
1167                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1168               }
1169               //                  }
1170             } // end while
1171             currentPosition--;
1172             return TokenNameSTRING;
1173           }
1174           // ---------Consume white space and handles startPosition---------
1175           int whiteStart = currentPosition;
1176           startPosition = currentPosition;
1177           currentCharacter = source[currentPosition++];
1178           if (encapsedChar == '$') {
1179             switch (currentCharacter) {
1180               case '\\' :
1181                 currentCharacter = source[currentPosition++];
1182                 return TokenNameSTRING;
1183               case '{' :
1184                 if (encapsedChar == '$') {
1185                   if (getNextChar('$'))
1186                     return TokenNameCURLY_OPEN;
1187                 }
1188                 return TokenNameLBRACE;
1189               case '}' :
1190                 return TokenNameRBRACE;
1191               case '[' :
1192                 return TokenNameLBRACKET;
1193               case ']' :
1194                 return TokenNameRBRACKET;
1195               case '\'' :
1196                 if (tokenizeStrings) {
1197                   consumeStringConstant();
1198                   return TokenNameStringConstant;
1199                 }
1200                 return TokenNameEncapsedString1;
1201               case '"' :
1202                 return TokenNameEncapsedString2;
1203               case '`' :
1204                 if (tokenizeStrings) {
1205                   consumeStringInterpolated();
1206                   return TokenNameStringInterpolated;
1207                 }
1208                 return TokenNameEncapsedString0;
1209               case '-' :
1210                 if (getNextChar('>'))
1211                   return TokenNameMINUS_GREATER;
1212                 return TokenNameSTRING;
1213               default :
1214                 if (currentCharacter == '$') {
1215                   int oldPosition = currentPosition;
1216                   try {
1217                     currentCharacter = source[currentPosition++];
1218                     if (currentCharacter == '{') {
1219                       return TokenNameDOLLAR_LBRACE;
1220                     }
1221                     if (isPHPIdentifierStart(currentCharacter)) {
1222                       return scanIdentifierOrKeyword(true);
1223                     } else {
1224                       currentPosition = oldPosition;
1225                       return TokenNameSTRING;
1226                     }
1227                   } catch (IndexOutOfBoundsException e) {
1228                     currentPosition = oldPosition;
1229                     return TokenNameSTRING;
1230                   }
1231                 }
1232                 if (isPHPIdentifierStart(currentCharacter))
1233                   return scanIdentifierOrKeyword(false);
1234                 if (Character.isDigit(currentCharacter))
1235                   return scanNumber(false);
1236                 return TokenNameERROR;
1237             }
1238           }
1239           //          boolean isWhiteSpace;
1240           
1241           while ((currentCharacter == ' ')
1242               || Character.isWhitespace(currentCharacter)) {
1243             startPosition = currentPosition;
1244             currentCharacter = source[currentPosition++];
1245             //            if (((currentCharacter = source[currentPosition++]) == '\\')
1246             //              && (source[currentPosition] == 'u')) {
1247             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
1248             //            } else {
1249             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1250               checkNonExternalizeString();
1251               if (recordLineSeparator) {
1252                 pushLineSeparator();
1253               } else {
1254                 currentLine = null;
1255               }
1256             }
1257             //            isWhiteSpace = (currentCharacter == ' ')
1258             //                || Character.isWhitespace(currentCharacter);
1259             //            }
1260           }
1261           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
1262             // reposition scanner in case we are interested by spaces as tokens
1263             currentPosition--;
1264             startPosition = whiteStart;
1265             return TokenNameWHITESPACE;
1266           }
1267           //little trick to get out in the middle of a source compuation
1268           if (currentPosition > eofPosition)
1269             return TokenNameEOF;
1270           // ---------Identify the next token-------------
1271           switch (currentCharacter) {
1272             case '(' :
1273               return getCastOrParen();
1274             case ')' :
1275               return TokenNameRPAREN;
1276             case '{' :
1277               return TokenNameLBRACE;
1278             case '}' :
1279               return TokenNameRBRACE;
1280             case '[' :
1281               return TokenNameLBRACKET;
1282             case ']' :
1283               return TokenNameRBRACKET;
1284             case ';' :
1285               return TokenNameSEMICOLON;
1286             case ',' :
1287               return TokenNameCOMMA;
1288             case '.' :
1289               if (getNextChar('='))
1290                 return TokenNameDOT_EQUAL;
1291               if (getNextCharAsDigit())
1292                 return scanNumber(true);
1293               return TokenNameDOT;
1294             case '+' :
1295               {
1296                 int test;
1297                 if ((test = getNextChar('+', '=')) == 0)
1298                   return TokenNamePLUS_PLUS;
1299                 if (test > 0)
1300                   return TokenNamePLUS_EQUAL;
1301                 return TokenNamePLUS;
1302               }
1303             case '-' :
1304               {
1305                 int test;
1306                 if ((test = getNextChar('-', '=')) == 0)
1307                   return TokenNameMINUS_MINUS;
1308                 if (test > 0)
1309                   return TokenNameMINUS_EQUAL;
1310                 if (getNextChar('>'))
1311                   return TokenNameMINUS_GREATER;
1312                 return TokenNameMINUS;
1313               }
1314             case '~' :
1315               if (getNextChar('='))
1316                 return TokenNameTWIDDLE_EQUAL;
1317               return TokenNameTWIDDLE;
1318             case '!' :
1319               if (getNextChar('=')) {
1320                 if (getNextChar('=')) {
1321                   return TokenNameNOT_EQUAL_EQUAL;
1322                 }
1323                 return TokenNameNOT_EQUAL;
1324               }
1325               return TokenNameNOT;
1326             case '*' :
1327               if (getNextChar('='))
1328                 return TokenNameMULTIPLY_EQUAL;
1329               return TokenNameMULTIPLY;
1330             case '%' :
1331               if (getNextChar('='))
1332                 return TokenNameREMAINDER_EQUAL;
1333               return TokenNameREMAINDER;
1334             case '<' :
1335               {
1336                 int oldPosition = currentPosition;
1337                 try {
1338                   currentCharacter = source[currentPosition++];
1339                 } catch (IndexOutOfBoundsException e) {
1340                   currentPosition = oldPosition;
1341                   return TokenNameLESS;
1342                 }
1343                 switch (currentCharacter) {
1344                   case '=' :
1345                     return TokenNameLESS_EQUAL;
1346                   case '>' :
1347                     return TokenNameNOT_EQUAL;
1348                   case '<' :
1349                     if (getNextChar('='))
1350                       return TokenNameLEFT_SHIFT_EQUAL;
1351                     if (getNextChar('<')) {
1352                       currentCharacter = source[currentPosition++];
1353                       while (Character.isWhitespace(currentCharacter)) {
1354                         currentCharacter = source[currentPosition++];
1355                       }
1356                       int heredocStart = currentPosition - 1;
1357                       int heredocLength = 0;
1358                       if (isPHPIdentifierStart(currentCharacter)) {
1359                         currentCharacter = source[currentPosition++];
1360                       } else {
1361                         return TokenNameERROR;
1362                       }
1363                       while (isPHPIdentifierPart(currentCharacter)) {
1364                         currentCharacter = source[currentPosition++];
1365                       }
1366                       heredocLength = currentPosition - heredocStart - 1;
1367                       // heredoc end-tag determination
1368                       boolean endTag = true;
1369                       char ch;
1370                       do {
1371                         ch = source[currentPosition++];
1372                         if (ch == '\r' || ch == '\n') {
1373                           if (recordLineSeparator) {
1374                             pushLineSeparator();
1375                           } else {
1376                             currentLine = null;
1377                           }
1378                           for (int i = 0; i < heredocLength; i++) {
1379                             if (source[currentPosition + i] != source[heredocStart
1380                                 + i]) {
1381                               endTag = false;
1382                               break;
1383                             }
1384                           }
1385                           if (endTag) {
1386                             currentPosition += heredocLength - 1;
1387                             currentCharacter = source[currentPosition++];
1388                             break; // do...while loop
1389                           } else {
1390                             endTag = true;
1391                           }
1392                         }
1393                       } while (true);
1394                       return TokenNameHEREDOC;
1395                     }
1396                     return TokenNameLEFT_SHIFT;
1397                 }
1398                 currentPosition = oldPosition;
1399                 return TokenNameLESS;
1400               }
1401             case '>' :
1402               {
1403                 int test;
1404                 if ((test = getNextChar('=', '>')) == 0)
1405                   return TokenNameGREATER_EQUAL;
1406                 if (test > 0) {
1407                   if ((test = getNextChar('=', '>')) == 0)
1408                     return TokenNameRIGHT_SHIFT_EQUAL;
1409                   return TokenNameRIGHT_SHIFT;
1410                 }
1411                 return TokenNameGREATER;
1412               }
1413             case '=' :
1414               if (getNextChar('=')) {
1415                 if (getNextChar('=')) {
1416                   return TokenNameEQUAL_EQUAL_EQUAL;
1417                 }
1418                 return TokenNameEQUAL_EQUAL;
1419               }
1420               if (getNextChar('>'))
1421                 return TokenNameEQUAL_GREATER;
1422               return TokenNameEQUAL;
1423             case '&' :
1424               {
1425                 int test;
1426                 if ((test = getNextChar('&', '=')) == 0)
1427                   return TokenNameAND_AND;
1428                 if (test > 0)
1429                   return TokenNameAND_EQUAL;
1430                 return TokenNameAND;
1431               }
1432             case '|' :
1433               {
1434                 int test;
1435                 if ((test = getNextChar('|', '=')) == 0)
1436                   return TokenNameOR_OR;
1437                 if (test > 0)
1438                   return TokenNameOR_EQUAL;
1439                 return TokenNameOR;
1440               }
1441             case '^' :
1442               if (getNextChar('='))
1443                 return TokenNameXOR_EQUAL;
1444               return TokenNameXOR;
1445             case '?' :
1446               if (getNextChar('>')) {
1447                 phpMode = false;
1448                 if (currentPosition == source.length) {
1449                   phpMode = true;
1450                   return TokenNameINLINE_HTML;
1451                 }
1452                 return getInlinedHTML(currentPosition - 2);
1453               }
1454               return TokenNameQUESTION;
1455             case ':' :
1456               if (getNextChar(':'))
1457                 return TokenNamePAAMAYIM_NEKUDOTAYIM;
1458               return TokenNameCOLON;
1459             case '@' :
1460               return TokenNameAT;
1461             case '\'' :
1462               consumeStringConstant();
1463               return TokenNameStringConstant;
1464             case '"' :
1465               if (tokenizeStrings) {
1466                 consumeStringLiteral();
1467                 return TokenNameStringLiteral;
1468               }
1469               return TokenNameEncapsedString2;
1470             case '`' :
1471               if (tokenizeStrings) {
1472                 consumeStringInterpolated();
1473                 return TokenNameStringInterpolated;
1474               }
1475               return TokenNameEncapsedString0;
1476             case '#' :
1477             case '/' :
1478               {
1479                 char startChar = currentCharacter;
1480                 if (getNextChar('=')) {
1481                   return TokenNameDIVIDE_EQUAL;
1482                 }
1483                 int test;
1484                 if ((startChar == '#') || (test = getNextChar('/', '*')) == 0) {
1485                   //line comment
1486                         this.lastCommentLinePosition = this.currentPosition;
1487                   int endPositionForLineComment = 0;
1488                   try { //get the next char
1489                     currentCharacter = source[currentPosition++];
1490                     //                    if (((currentCharacter = source[currentPosition++])
1491                     //                      == '\\')
1492                     //                      && (source[currentPosition] == 'u')) {
1493                     //                      //-------------unicode traitement ------------
1494                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1495                     //                      currentPosition++;
1496                     //                      while (source[currentPosition] == 'u') {
1497                     //                        currentPosition++;
1498                     //                      }
1499                     //                      if ((c1 =
1500                     //                        Character.getNumericValue(source[currentPosition++]))
1501                     //                        > 15
1502                     //                        || c1 < 0
1503                     //                        || (c2 =
1504                     //                          Character.getNumericValue(source[currentPosition++]))
1505                     //                          > 15
1506                     //                        || c2 < 0
1507                     //                        || (c3 =
1508                     //                          Character.getNumericValue(source[currentPosition++]))
1509                     //                          > 15
1510                     //                        || c3 < 0
1511                     //                        || (c4 =
1512                     //                          Character.getNumericValue(source[currentPosition++]))
1513                     //                          > 15
1514                     //                        || c4 < 0) {
1515                     //                        throw new
1516                     // InvalidInputException(INVALID_UNICODE_ESCAPE);
1517                     //                      } else {
1518                     //                        currentCharacter =
1519                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1520                     //                      }
1521                     //                    }
1522                     //handle the \\u case manually into comment
1523                     //                    if (currentCharacter == '\\') {
1524                     //                      if (source[currentPosition] == '\\')
1525                     //                        currentPosition++;
1526                     //                    } //jump over the \\
1527                     boolean isUnicode = false;
1528                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1529                         this.lastCommentLinePosition = this.currentPosition;
1530                       if (currentCharacter == '?') {
1531                         if (getNextChar('>')) {
1532                           startPosition = currentPosition - 2;
1533                           phpMode = false;
1534                           return TokenNameINLINE_HTML;
1535                         }
1536                       }
1537                       //get the next char
1538                       isUnicode = false;
1539                       currentCharacter = source[currentPosition++];
1540                       //                      if (((currentCharacter = source[currentPosition++])
1541                       //                        == '\\')
1542                       //                        && (source[currentPosition] == 'u')) {
1543                       //                        isUnicode = true;
1544                       //                        //-------------unicode traitement ------------
1545                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1546                       //                        currentPosition++;
1547                       //                        while (source[currentPosition] == 'u') {
1548                       //                          currentPosition++;
1549                       //                        }
1550                       //                        if ((c1 =
1551                       //                          Character.getNumericValue(source[currentPosition++]))
1552                       //                          > 15
1553                       //                          || c1 < 0
1554                       //                          || (c2 =
1555                       //                            Character.getNumericValue(
1556                       //                              source[currentPosition++]))
1557                       //                            > 15
1558                       //                          || c2 < 0
1559                       //                          || (c3 =
1560                       //                            Character.getNumericValue(
1561                       //                              source[currentPosition++]))
1562                       //                            > 15
1563                       //                          || c3 < 0
1564                       //                          || (c4 =
1565                       //                            Character.getNumericValue(
1566                       //                              source[currentPosition++]))
1567                       //                            > 15
1568                       //                          || c4 < 0) {
1569                       //                          throw new
1570                       // InvalidInputException(INVALID_UNICODE_ESCAPE);
1571                       //                        } else {
1572                       //                          currentCharacter =
1573                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1574                       //                        }
1575                       //                      }
1576                       //handle the \\u case manually into comment
1577                       //                      if (currentCharacter == '\\') {
1578                       //                        if (source[currentPosition] == '\\')
1579                       //                          currentPosition++;
1580                       //                      } //jump over the \\
1581                     }
1582                     if (isUnicode) {
1583                       endPositionForLineComment = currentPosition - 6;
1584                     } else {
1585                       endPositionForLineComment = currentPosition - 1;
1586                     }
1587 //                    recordComment(false);
1588                     recordComment(TokenNameCOMMENT_LINE);
1589                     if (this.taskTags != null) checkTaskTag(this.startPosition, this.currentPosition);
1590                                         if ((currentCharacter == '\r')
1591                         || (currentCharacter == '\n')) {
1592                       checkNonExternalizeString();
1593                       if (recordLineSeparator) {
1594                         if (isUnicode) {
1595                           pushUnicodeLineSeparator();
1596                         } else {
1597                           pushLineSeparator();
1598                         }
1599                       } else {
1600                         currentLine = null;
1601                       }
1602                     }
1603                     if (tokenizeComments) {
1604                       if (!isUnicode) {
1605                         currentPosition = endPositionForLineComment;
1606                         // reset one character behind
1607                       }
1608                       return TokenNameCOMMENT_LINE;
1609                     }
1610                   } catch (IndexOutOfBoundsException e) { //an eof will them
1611                     // be generated
1612                     if (tokenizeComments) {
1613                       currentPosition--;
1614                       // reset one character behind
1615                       return TokenNameCOMMENT_LINE;
1616                     }
1617                   }
1618                   break;
1619                 }
1620                 if (test > 0) {
1621                   //traditional and annotation comment
1622                   boolean isJavadoc = false, star = false;
1623                   // consume next character
1624                   unicodeAsBackSlash = false;
1625                   currentCharacter = source[currentPosition++];
1626                   //                  if (((currentCharacter = source[currentPosition++]) ==
1627                   // '\\')
1628                   //                    && (source[currentPosition] == 'u')) {
1629                   //                    getNextUnicodeChar();
1630                   //                  } else {
1631                   //                    if (withoutUnicodePtr != 0) {
1632                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1633                   //                        currentCharacter;
1634                   //                    }
1635                   //                  }
1636                   if (currentCharacter == '*') {
1637                     isJavadoc = true;
1638                     star = true;
1639                   }
1640                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1641                     checkNonExternalizeString();
1642                     if (recordLineSeparator) {
1643                       pushLineSeparator();
1644                     } else {
1645                       currentLine = null;
1646                     }
1647                   }
1648                   try { //get the next char
1649                     currentCharacter = source[currentPosition++];
1650                     //                    if (((currentCharacter = source[currentPosition++])
1651                     //                      == '\\')
1652                     //                      && (source[currentPosition] == 'u')) {
1653                     //                      //-------------unicode traitement ------------
1654                     //                      getNextUnicodeChar();
1655                     //                    }
1656                     //handle the \\u case manually into comment
1657                     //                    if (currentCharacter == '\\') {
1658                     //                      if (source[currentPosition] == '\\')
1659                     //                        currentPosition++;
1660                     //                      //jump over the \\
1661                     //                    }
1662                     // empty comment is not a javadoc /**/
1663                     if (currentCharacter == '/') {
1664                       isJavadoc = false;
1665                     }
1666                     //loop until end of comment */
1667                     while ((currentCharacter != '/') || (!star)) {
1668                       if ((currentCharacter == '\r')
1669                           || (currentCharacter == '\n')) {
1670                         checkNonExternalizeString();
1671                         if (recordLineSeparator) {
1672                           pushLineSeparator();
1673                         } else {
1674                           currentLine = null;
1675                         }
1676                       }
1677                       star = currentCharacter == '*';
1678                       //get next char
1679                       currentCharacter = source[currentPosition++];
1680                       //                      if (((currentCharacter = source[currentPosition++])
1681                       //                        == '\\')
1682                       //                        && (source[currentPosition] == 'u')) {
1683                       //                        //-------------unicode traitement ------------
1684                       //                        getNextUnicodeChar();
1685                       //                      }
1686                       //handle the \\u case manually into comment
1687                       //                      if (currentCharacter == '\\') {
1688                       //                        if (source[currentPosition] == '\\')
1689                       //                          currentPosition++;
1690                       //                      } //jump over the \\
1691                     }
1692                     //recordComment(isJavadoc);
1693                     if (isJavadoc) {
1694                         recordComment(TokenNameCOMMENT_PHPDOC);
1695                     } else {
1696                         recordComment(TokenNameCOMMENT_BLOCK);
1697                     }
1698                     
1699                     if (tokenizeComments) {
1700                       if (isJavadoc)
1701                         return TokenNameCOMMENT_PHPDOC;
1702                       return TokenNameCOMMENT_BLOCK;
1703                     }
1704                   } catch (IndexOutOfBoundsException e) {
1705                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1706                   }
1707                   break;
1708                 }
1709                 return TokenNameDIVIDE;
1710               }
1711             case '\u001a' :
1712               if (atEnd())
1713                 return TokenNameEOF;
1714               //the atEnd may not be <currentPosition == source.length> if
1715               // source is only some part of a real (external) stream
1716               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1717             default :
1718               if (currentCharacter == '$') {
1719                 int oldPosition = currentPosition;
1720                 try {
1721                   currentCharacter = source[currentPosition++];
1722                   if (isPHPIdentifierStart(currentCharacter)) {
1723                     return scanIdentifierOrKeyword(true);
1724                   } else {
1725                     currentPosition = oldPosition;
1726                     return TokenNameDOLLAR;
1727                   }
1728                 } catch (IndexOutOfBoundsException e) {
1729                   currentPosition = oldPosition;
1730                   return TokenNameDOLLAR;
1731                 }
1732               }
1733               if (isPHPIdentifierStart(currentCharacter))
1734                 return scanIdentifierOrKeyword(false);
1735               if (Character.isDigit(currentCharacter))
1736                 return scanNumber(false);
1737               return TokenNameERROR;
1738           }
1739         }
1740       } //-----------------end switch while try--------------------
1741       catch (IndexOutOfBoundsException e) {
1742       }
1743     }
1744     return TokenNameEOF;
1745   }
1746   
1747   private int getInlinedHTML(int start) throws InvalidInputException {
1748         int token = getInlinedHTMLToken(start);
1749         if (token == TokenNameINLINE_HTML) {
1750 //              Stack stack = new Stack();
1751 //              // scan html for errors
1752 //              Source inlinedHTMLSource = new Source(new String(source, startPosition, currentPosition - startPosition));
1753 //              int lastPHPEndPos=0;
1754 //              for (Iterator i=inlinedHTMLSource.getNextTagIterator(0); i.hasNext();) {
1755 //                  Tag tag=(Tag)i.next();
1756 //                  
1757 //                  if (tag instanceof StartTag) {
1758 //                      StartTag startTag=(StartTag)tag;
1759 //                    //  System.out.println("startTag: "+tag);
1760 //                      if (startTag.isServerTag()) {
1761 //                        // TODO : what to do with a server tag ? 
1762 //                      } else {
1763 //                          // do whatever with HTML start tag
1764 //                          // use startTag.getElement() to find the element corresponding
1765 //                          // to this start tag which may be useful if you implement code
1766 //                          // folding etc
1767 //                              stack.push(startTag);
1768 //                      }
1769 //                  } else {
1770 //                      EndTag endTag=(EndTag)tag;
1771 //                      StartTag stag = (StartTag) stack.peek();
1772 ////                  System.out.println("endTag: "+tag);
1773 //                      // do whatever with HTML end tag.
1774 //                  }
1775 //              }
1776         }
1777         return token;
1778   }
1779   /**
1780    * @return @throws
1781    *         InvalidInputException
1782    */
1783   private int getInlinedHTMLToken(int start) throws InvalidInputException {
1784     //    int htmlPosition = start;
1785     if (currentPosition > source.length) {
1786       currentPosition = source.length;
1787       return TokenNameEOF;
1788     }
1789     startPosition = start;
1790     try {
1791       while (!phpMode) {
1792         currentCharacter = source[currentPosition++];
1793         if (currentCharacter == '<') {
1794           if (getNextChar('?')) {
1795             currentCharacter = source[currentPosition++];
1796             if ((currentCharacter == ' ')
1797                 || Character.isWhitespace(currentCharacter)) {
1798               // <?
1799               phpMode = true;
1800               return TokenNameINLINE_HTML;
1801             } else {
1802               boolean phpStart = (currentCharacter == 'P')
1803                   || (currentCharacter == 'p');
1804               if (phpStart) {
1805                 int test = getNextChar('H', 'h');
1806                 if (test >= 0) {
1807                   test = getNextChar('P', 'p');
1808                   if (test >= 0) {
1809                     // <?PHP <?php
1810                     phpMode = true;
1811                     return TokenNameINLINE_HTML;
1812                   }
1813                 }
1814               }
1815             }
1816           }
1817         }
1818         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1819           if (recordLineSeparator) {
1820             pushLineSeparator();
1821           } else {
1822             currentLine = null;
1823           }
1824         }
1825       } //-----------------while--------------------
1826       phpMode = true;
1827       return TokenNameINLINE_HTML;
1828     } //-----------------try--------------------
1829     catch (IndexOutOfBoundsException e) {
1830       startPosition = start;
1831       currentPosition--;
1832     }
1833     phpMode = true;
1834     return TokenNameINLINE_HTML;
1835   }
1836   //  public final void getNextUnicodeChar()
1837   //    throws IndexOutOfBoundsException, InvalidInputException {
1838   //    //VOID
1839   //    //handle the case of unicode.
1840   //    //when a unicode appears then we must use a buffer that holds char
1841   // internal values
1842   //    //At the end of this method currentCharacter holds the new visited char
1843   //    //and currentPosition points right next after it
1844   //
1845   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1846   //
1847   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1848   //    currentPosition++;
1849   //    while (source[currentPosition] == 'u') {
1850   //      currentPosition++;
1851   //      unicodeSize++;
1852   //    }
1853   //
1854   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1855   //      || c1 < 0
1856   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1857   //      || c2 < 0
1858   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1859   //      || c3 < 0
1860   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1861   //      || c4 < 0) {
1862   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1863   //    } else {
1864   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1865   //      //need the unicode buffer
1866   //      if (withoutUnicodePtr == 0) {
1867   //        //buffer all the entries that have been left aside....
1868   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1869   //        System.arraycopy(
1870   //          source,
1871   //          startPosition,
1872   //          withoutUnicodeBuffer,
1873   //          1,
1874   //          withoutUnicodePtr);
1875   //      }
1876   //      //fill the buffer with the char
1877   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1878   //    }
1879   //    unicodeAsBackSlash = currentCharacter == '\\';
1880   //  }
1881   /*
1882    * Tokenize a method body, assuming that curly brackets are properly
1883    * balanced.
1884    */
1885   public final void jumpOverMethodBody() {
1886     this.wasAcr = false;
1887     int found = 1;
1888     try {
1889       while (true) { //loop for jumping over comments
1890         // ---------Consume white space and handles startPosition---------
1891         boolean isWhiteSpace;
1892         do {
1893           startPosition = currentPosition;
1894           currentCharacter = source[currentPosition++];
1895           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1896           //            && (source[currentPosition] == 'u')) {
1897           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1898           //          } else {
1899           if (recordLineSeparator
1900               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1901             pushLineSeparator();
1902           isWhiteSpace = Character.isWhitespace(currentCharacter);
1903           //          }
1904         } while (isWhiteSpace);
1905         // -------consume token until } is found---------
1906         switch (currentCharacter) {
1907           case '{' :
1908             found++;
1909             break;
1910           case '}' :
1911             found--;
1912             if (found == 0)
1913               return;
1914             break;
1915           case '\'' :
1916             {
1917               boolean test;
1918               test = getNextChar('\\');
1919               if (test) {
1920                 try {
1921                   scanDoubleQuotedEscapeCharacter();
1922                 } catch (InvalidInputException ex) {
1923                 };
1924               } else {
1925                 //                try { // consume next character
1926                 unicodeAsBackSlash = false;
1927                 currentCharacter = source[currentPosition++];
1928                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1929                 //                    && (source[currentPosition] == 'u')) {
1930                 //                    getNextUnicodeChar();
1931                 //                  } else {
1932                 if (withoutUnicodePtr != 0) {
1933                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1934                 }
1935                 //                  }
1936                 //                } catch (InvalidInputException ex) {
1937                 //                };
1938               }
1939               getNextChar('\'');
1940               break;
1941             }
1942           case '"' :
1943             try {
1944               //              try { // consume next character
1945               unicodeAsBackSlash = false;
1946               currentCharacter = source[currentPosition++];
1947               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1948               //                  && (source[currentPosition] == 'u')) {
1949               //                  getNextUnicodeChar();
1950               //                } else {
1951               if (withoutUnicodePtr != 0) {
1952                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1953               }
1954               //                }
1955               //              } catch (InvalidInputException ex) {
1956               //              };
1957               while (currentCharacter != '"') {
1958                 if (currentCharacter == '\r') {
1959                   if (source[currentPosition] == '\n')
1960                     currentPosition++;
1961                   break;
1962                   // the string cannot go further that the line
1963                 }
1964                 if (currentCharacter == '\n') {
1965                   break;
1966                   // the string cannot go further that the line
1967                 }
1968                 if (currentCharacter == '\\') {
1969                   try {
1970                     scanDoubleQuotedEscapeCharacter();
1971                   } catch (InvalidInputException ex) {
1972                   };
1973                 }
1974                 //                try { // consume next character
1975                 unicodeAsBackSlash = false;
1976                 currentCharacter = source[currentPosition++];
1977                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1978                 //                    && (source[currentPosition] == 'u')) {
1979                 //                    getNextUnicodeChar();
1980                 //                  } else {
1981                 if (withoutUnicodePtr != 0) {
1982                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1983                 }
1984                 //                  }
1985                 //                } catch (InvalidInputException ex) {
1986                 //                };
1987               }
1988             } catch (IndexOutOfBoundsException e) {
1989               return;
1990             }
1991             break;
1992           case '/' :
1993             {
1994               int test;
1995               if ((test = getNextChar('/', '*')) == 0) {
1996                 //line comment
1997                 try {
1998                   //get the next char
1999                   currentCharacter = source[currentPosition++];
2000                   //                  if (((currentCharacter = source[currentPosition++]) ==
2001                   // '\\')
2002                   //                    && (source[currentPosition] == 'u')) {
2003                   //                    //-------------unicode traitement ------------
2004                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2005                   //                    currentPosition++;
2006                   //                    while (source[currentPosition] == 'u') {
2007                   //                      currentPosition++;
2008                   //                    }
2009                   //                    if ((c1 =
2010                   //                      Character.getNumericValue(source[currentPosition++]))
2011                   //                      > 15
2012                   //                      || c1 < 0
2013                   //                      || (c2 =
2014                   //                        Character.getNumericValue(source[currentPosition++]))
2015                   //                        > 15
2016                   //                      || c2 < 0
2017                   //                      || (c3 =
2018                   //                        Character.getNumericValue(source[currentPosition++]))
2019                   //                        > 15
2020                   //                      || c3 < 0
2021                   //                      || (c4 =
2022                   //                        Character.getNumericValue(source[currentPosition++]))
2023                   //                        > 15
2024                   //                      || c4 < 0) {
2025                   //                      //error don't care of the value
2026                   //                      currentCharacter = 'A';
2027                   //                    } //something different from \n and \r
2028                   //                    else {
2029                   //                      currentCharacter =
2030                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2031                   //                    }
2032                   //                  }
2033                   while (currentCharacter != '\r' && currentCharacter != '\n') {
2034                     //get the next char
2035                     currentCharacter = source[currentPosition++];
2036                     //                    if (((currentCharacter = source[currentPosition++])
2037                     //                      == '\\')
2038                     //                      && (source[currentPosition] == 'u')) {
2039                     //                      //-------------unicode traitement ------------
2040                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2041                     //                      currentPosition++;
2042                     //                      while (source[currentPosition] == 'u') {
2043                     //                        currentPosition++;
2044                     //                      }
2045                     //                      if ((c1 =
2046                     //                        Character.getNumericValue(source[currentPosition++]))
2047                     //                        > 15
2048                     //                        || c1 < 0
2049                     //                        || (c2 =
2050                     //                          Character.getNumericValue(source[currentPosition++]))
2051                     //                          > 15
2052                     //                        || c2 < 0
2053                     //                        || (c3 =
2054                     //                          Character.getNumericValue(source[currentPosition++]))
2055                     //                          > 15
2056                     //                        || c3 < 0
2057                     //                        || (c4 =
2058                     //                          Character.getNumericValue(source[currentPosition++]))
2059                     //                          > 15
2060                     //                        || c4 < 0) {
2061                     //                        //error don't care of the value
2062                     //                        currentCharacter = 'A';
2063                     //                      } //something different from \n and \r
2064                     //                      else {
2065                     //                        currentCharacter =
2066                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2067                     //                      }
2068                     //                    }
2069                   }
2070                   if (recordLineSeparator
2071                       && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2072                     pushLineSeparator();
2073                 } catch (IndexOutOfBoundsException e) {
2074                 } //an eof will them be generated
2075                 break;
2076               }
2077               if (test > 0) {
2078                 //traditional and annotation comment
2079                 boolean star = false;
2080                 //                try { // consume next character
2081                 unicodeAsBackSlash = false;
2082                 currentCharacter = source[currentPosition++];
2083                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
2084                 //                    && (source[currentPosition] == 'u')) {
2085                 //                    getNextUnicodeChar();
2086                 //                  } else {
2087                 if (withoutUnicodePtr != 0) {
2088                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2089                 }
2090                 //                  };
2091                 //                } catch (InvalidInputException ex) {
2092                 //                };
2093                 if (currentCharacter == '*') {
2094                   star = true;
2095                 }
2096                 if (recordLineSeparator
2097                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2098                   pushLineSeparator();
2099                 try { //get the next char
2100                   currentCharacter = source[currentPosition++];
2101                   //                  if (((currentCharacter = source[currentPosition++]) ==
2102                   // '\\')
2103                   //                    && (source[currentPosition] == 'u')) {
2104                   //                    //-------------unicode traitement ------------
2105                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2106                   //                    currentPosition++;
2107                   //                    while (source[currentPosition] == 'u') {
2108                   //                      currentPosition++;
2109                   //                    }
2110                   //                    if ((c1 =
2111                   //                      Character.getNumericValue(source[currentPosition++]))
2112                   //                      > 15
2113                   //                      || c1 < 0
2114                   //                      || (c2 =
2115                   //                        Character.getNumericValue(source[currentPosition++]))
2116                   //                        > 15
2117                   //                      || c2 < 0
2118                   //                      || (c3 =
2119                   //                        Character.getNumericValue(source[currentPosition++]))
2120                   //                        > 15
2121                   //                      || c3 < 0
2122                   //                      || (c4 =
2123                   //                        Character.getNumericValue(source[currentPosition++]))
2124                   //                        > 15
2125                   //                      || c4 < 0) {
2126                   //                      //error don't care of the value
2127                   //                      currentCharacter = 'A';
2128                   //                    } //something different from * and /
2129                   //                    else {
2130                   //                      currentCharacter =
2131                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2132                   //                    }
2133                   //                  }
2134                   //loop until end of comment */
2135                   while ((currentCharacter != '/') || (!star)) {
2136                     if (recordLineSeparator
2137                         && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2138                       pushLineSeparator();
2139                     star = currentCharacter == '*';
2140                     //get next char
2141                     currentCharacter = source[currentPosition++];
2142                     //                    if (((currentCharacter = source[currentPosition++])
2143                     //                      == '\\')
2144                     //                      && (source[currentPosition] == 'u')) {
2145                     //                      //-------------unicode traitement ------------
2146                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
2147                     //                      currentPosition++;
2148                     //                      while (source[currentPosition] == 'u') {
2149                     //                        currentPosition++;
2150                     //                      }
2151                     //                      if ((c1 =
2152                     //                        Character.getNumericValue(source[currentPosition++]))
2153                     //                        > 15
2154                     //                        || c1 < 0
2155                     //                        || (c2 =
2156                     //                          Character.getNumericValue(source[currentPosition++]))
2157                     //                          > 15
2158                     //                        || c2 < 0
2159                     //                        || (c3 =
2160                     //                          Character.getNumericValue(source[currentPosition++]))
2161                     //                          > 15
2162                     //                        || c3 < 0
2163                     //                        || (c4 =
2164                     //                          Character.getNumericValue(source[currentPosition++]))
2165                     //                          > 15
2166                     //                        || c4 < 0) {
2167                     //                        //error don't care of the value
2168                     //                        currentCharacter = 'A';
2169                     //                      } //something different from * and /
2170                     //                      else {
2171                     //                        currentCharacter =
2172                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2173                     //                      }
2174                     //                    }
2175                   }
2176                 } catch (IndexOutOfBoundsException e) {
2177                   return;
2178                 }
2179                 break;
2180               }
2181               break;
2182             }
2183           default :
2184             if (isPHPIdentifierStart(currentCharacter)
2185                 || currentCharacter == '$') {
2186               try {
2187                 scanIdentifierOrKeyword((currentCharacter == '$'));
2188               } catch (InvalidInputException ex) {
2189               };
2190               break;
2191             }
2192             if (Character.isDigit(currentCharacter)) {
2193               try {
2194                 scanNumber(false);
2195               } catch (InvalidInputException ex) {
2196               };
2197               break;
2198             }
2199         }
2200       }
2201       //-----------------end switch while try--------------------
2202     } catch (IndexOutOfBoundsException e) {
2203     } catch (InvalidInputException e) {
2204     }
2205     return;
2206   }
2207   //  public final boolean jumpOverUnicodeWhiteSpace()
2208   //    throws InvalidInputException {
2209   //    //BOOLEAN
2210   //    //handle the case of unicode. Jump over the next whiteSpace
2211   //    //making startPosition pointing on the next available char
2212   //    //On false, the currentCharacter is filled up with a potential
2213   //    //correct char
2214   //
2215   //    try {
2216   //      this.wasAcr = false;
2217   //      int c1, c2, c3, c4;
2218   //      int unicodeSize = 6;
2219   //      currentPosition++;
2220   //      while (source[currentPosition] == 'u') {
2221   //        currentPosition++;
2222   //        unicodeSize++;
2223   //      }
2224   //
2225   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2226   //        || c1 < 0)
2227   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2228   //          || c2 < 0)
2229   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2230   //          || c3 < 0)
2231   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2232   //          || c4 < 0)) {
2233   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2234   //      }
2235   //
2236   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2237   //      if (recordLineSeparator
2238   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2239   //        pushLineSeparator();
2240   //      if (Character.isWhitespace(currentCharacter))
2241   //        return true;
2242   //
2243   //      //buffer the new char which is not a white space
2244   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2245   //      //withoutUnicodePtr == 1 is true here
2246   //      return false;
2247   //    } catch (IndexOutOfBoundsException e) {
2248   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2249   //    }
2250   //  }
2251   public final int[] getLineEnds() {
2252     //return a bounded copy of this.lineEnds
2253     int[] copy;
2254     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2255     return copy;
2256   }
2257   public char[] getSource() {
2258     return this.source;
2259   }
2260   final char[] optimizedCurrentTokenSource1() {
2261     //return always the same char[] build only once
2262     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2263     char charOne = source[startPosition];
2264     switch (charOne) {
2265       case 'a' :
2266         return charArray_a;
2267       case 'b' :
2268         return charArray_b;
2269       case 'c' :
2270         return charArray_c;
2271       case 'd' :
2272         return charArray_d;
2273       case 'e' :
2274         return charArray_e;
2275       case 'f' :
2276         return charArray_f;
2277       case 'g' :
2278         return charArray_g;
2279       case 'h' :
2280         return charArray_h;
2281       case 'i' :
2282         return charArray_i;
2283       case 'j' :
2284         return charArray_j;
2285       case 'k' :
2286         return charArray_k;
2287       case 'l' :
2288         return charArray_l;
2289       case 'm' :
2290         return charArray_m;
2291       case 'n' :
2292         return charArray_n;
2293       case 'o' :
2294         return charArray_o;
2295       case 'p' :
2296         return charArray_p;
2297       case 'q' :
2298         return charArray_q;
2299       case 'r' :
2300         return charArray_r;
2301       case 's' :
2302         return charArray_s;
2303       case 't' :
2304         return charArray_t;
2305       case 'u' :
2306         return charArray_u;
2307       case 'v' :
2308         return charArray_v;
2309       case 'w' :
2310         return charArray_w;
2311       case 'x' :
2312         return charArray_x;
2313       case 'y' :
2314         return charArray_y;
2315       case 'z' :
2316         return charArray_z;
2317       default :
2318         return new char[]{charOne};
2319     }
2320   }
2321   final char[] optimizedCurrentTokenSource2() {
2322     //try to return the same char[] build only once
2323     char c0, c1;
2324     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2325         % TableSize;
2326     char[][] table = charArray_length[0][hash];
2327     int i = newEntry2;
2328     while (++i < InternalTableSize) {
2329       char[] charArray = table[i];
2330       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2331         return charArray;
2332     }
2333     //---------other side---------
2334     i = -1;
2335     int max = newEntry2;
2336     while (++i <= max) {
2337       char[] charArray = table[i];
2338       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2339         return charArray;
2340     }
2341     //--------add the entry-------
2342     if (++max >= InternalTableSize)
2343       max = 0;
2344     char[] r;
2345     table[max] = (r = new char[]{c0, c1});
2346     newEntry2 = max;
2347     return r;
2348   }
2349   final char[] optimizedCurrentTokenSource3() {
2350     //try to return the same char[] build only once
2351     char c0, c1, c2;
2352     int hash = (((c0 = source[startPosition]) << 12)
2353         + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2354         % TableSize;
2355     char[][] table = charArray_length[1][hash];
2356     int i = newEntry3;
2357     while (++i < InternalTableSize) {
2358       char[] charArray = table[i];
2359       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2360         return charArray;
2361     }
2362     //---------other side---------
2363     i = -1;
2364     int max = newEntry3;
2365     while (++i <= max) {
2366       char[] charArray = table[i];
2367       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2368         return charArray;
2369     }
2370     //--------add the entry-------
2371     if (++max >= InternalTableSize)
2372       max = 0;
2373     char[] r;
2374     table[max] = (r = new char[]{c0, c1, c2});
2375     newEntry3 = max;
2376     return r;
2377   }
2378   final char[] optimizedCurrentTokenSource4() {
2379     //try to return the same char[] build only once
2380     char c0, c1, c2, c3;
2381     long hash = ((((long) (c0 = source[startPosition])) << 18)
2382         + ((c1 = source[startPosition + 1]) << 12)
2383         + ((c2 = source[startPosition + 2]) << 6) + (c3 = source[startPosition + 3]))
2384         % TableSize;
2385     char[][] table = charArray_length[2][(int) hash];
2386     int i = newEntry4;
2387     while (++i < InternalTableSize) {
2388       char[] charArray = table[i];
2389       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2390           && (c3 == charArray[3]))
2391         return charArray;
2392     }
2393     //---------other side---------
2394     i = -1;
2395     int max = newEntry4;
2396     while (++i <= max) {
2397       char[] charArray = table[i];
2398       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2399           && (c3 == charArray[3]))
2400         return charArray;
2401     }
2402     //--------add the entry-------
2403     if (++max >= InternalTableSize)
2404       max = 0;
2405     char[] r;
2406     table[max] = (r = new char[]{c0, c1, c2, c3});
2407     newEntry4 = max;
2408     return r;
2409   }
2410   final char[] optimizedCurrentTokenSource5() {
2411     //try to return the same char[] build only once
2412     char c0, c1, c2, c3, c4;
2413     long hash = ((((long) (c0 = source[startPosition])) << 24)
2414         + (((long) (c1 = source[startPosition + 1])) << 18)
2415         + ((c2 = source[startPosition + 2]) << 12)
2416         + ((c3 = source[startPosition + 3]) << 6) + (c4 = source[startPosition + 4]))
2417         % TableSize;
2418     char[][] table = charArray_length[3][(int) hash];
2419     int i = newEntry5;
2420     while (++i < InternalTableSize) {
2421       char[] charArray = table[i];
2422       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2423           && (c3 == charArray[3]) && (c4 == charArray[4]))
2424         return charArray;
2425     }
2426     //---------other side---------
2427     i = -1;
2428     int max = newEntry5;
2429     while (++i <= max) {
2430       char[] charArray = table[i];
2431       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2432           && (c3 == charArray[3]) && (c4 == charArray[4]))
2433         return charArray;
2434     }
2435     //--------add the entry-------
2436     if (++max >= InternalTableSize)
2437       max = 0;
2438     char[] r;
2439     table[max] = (r = new char[]{c0, c1, c2, c3, c4});
2440     newEntry5 = max;
2441     return r;
2442   }
2443   final char[] optimizedCurrentTokenSource6() {
2444     //try to return the same char[] build only once
2445     char c0, c1, c2, c3, c4, c5;
2446     long hash = ((((long) (c0 = source[startPosition])) << 32)
2447         + (((long) (c1 = source[startPosition + 1])) << 24)
2448         + (((long) (c2 = source[startPosition + 2])) << 18)
2449         + ((c3 = source[startPosition + 3]) << 12)
2450         + ((c4 = source[startPosition + 4]) << 6) + (c5 = source[startPosition + 5]))
2451         % TableSize;
2452     char[][] table = charArray_length[4][(int) hash];
2453     int i = newEntry6;
2454     while (++i < InternalTableSize) {
2455       char[] charArray = table[i];
2456       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2457           && (c3 == charArray[3]) && (c4 == charArray[4])
2458           && (c5 == charArray[5]))
2459         return charArray;
2460     }
2461     //---------other side---------
2462     i = -1;
2463     int max = newEntry6;
2464     while (++i <= max) {
2465       char[] charArray = table[i];
2466       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2])
2467           && (c3 == charArray[3]) && (c4 == charArray[4])
2468           && (c5 == charArray[5]))
2469         return charArray;
2470     }
2471     //--------add the entry-------
2472     if (++max >= InternalTableSize)
2473       max = 0;
2474     char[] r;
2475     table[max] = (r = new char[]{c0, c1, c2, c3, c4, c5});
2476     newEntry6 = max;
2477     return r;
2478   }
2479   public final void pushLineSeparator() throws InvalidInputException {
2480     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2481     final int INCREMENT = 250;
2482     if (this.checkNonExternalizedStringLiterals) {
2483       // reinitialize the current line for non externalize strings purpose
2484       currentLine = null;
2485     }
2486     //currentCharacter is at position currentPosition-1
2487     // cr 000D
2488     if (currentCharacter == '\r') {
2489       int separatorPos = currentPosition - 1;
2490       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2491         return;
2492       //System.out.println("CR-" + separatorPos);
2493       try {
2494         lineEnds[++linePtr] = separatorPos;
2495       } catch (IndexOutOfBoundsException e) {
2496         //linePtr value is correct
2497         int oldLength = lineEnds.length;
2498         int[] old = lineEnds;
2499         lineEnds = new int[oldLength + INCREMENT];
2500         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2501         lineEnds[linePtr] = separatorPos;
2502       }
2503       // look-ahead for merged cr+lf
2504       try {
2505         if (source[currentPosition] == '\n') {
2506           //System.out.println("look-ahead LF-" + currentPosition);
2507           lineEnds[linePtr] = currentPosition;
2508           currentPosition++;
2509           wasAcr = false;
2510         } else {
2511           wasAcr = true;
2512         }
2513       } catch (IndexOutOfBoundsException e) {
2514         wasAcr = true;
2515       }
2516     } else {
2517       // lf 000A
2518       if (currentCharacter == '\n') {
2519         //must merge eventual cr followed by lf
2520         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2521           //System.out.println("merge LF-" + (currentPosition - 1));
2522           lineEnds[linePtr] = currentPosition - 1;
2523         } else {
2524           int separatorPos = currentPosition - 1;
2525           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2526             return;
2527           // System.out.println("LF-" + separatorPos);
2528           try {
2529             lineEnds[++linePtr] = separatorPos;
2530           } catch (IndexOutOfBoundsException e) {
2531             //linePtr value is correct
2532             int oldLength = lineEnds.length;
2533             int[] old = lineEnds;
2534             lineEnds = new int[oldLength + INCREMENT];
2535             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2536             lineEnds[linePtr] = separatorPos;
2537           }
2538         }
2539         wasAcr = false;
2540       }
2541     }
2542   }
2543   public final void pushUnicodeLineSeparator() {
2544     // isUnicode means that the \r or \n has been read as a unicode character
2545     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2546     final int INCREMENT = 250;
2547     //currentCharacter is at position currentPosition-1
2548     if (this.checkNonExternalizedStringLiterals) {
2549       // reinitialize the current line for non externalize strings purpose
2550       currentLine = null;
2551     }
2552     // cr 000D
2553     if (currentCharacter == '\r') {
2554       int separatorPos = currentPosition - 6;
2555       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2556         return;
2557       //System.out.println("CR-" + separatorPos);
2558       try {
2559         lineEnds[++linePtr] = separatorPos;
2560       } catch (IndexOutOfBoundsException e) {
2561         //linePtr value is correct
2562         int oldLength = lineEnds.length;
2563         int[] old = lineEnds;
2564         lineEnds = new int[oldLength + INCREMENT];
2565         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2566         lineEnds[linePtr] = separatorPos;
2567       }
2568       // look-ahead for merged cr+lf
2569       if (source[currentPosition] == '\n') {
2570         //System.out.println("look-ahead LF-" + currentPosition);
2571         lineEnds[linePtr] = currentPosition;
2572         currentPosition++;
2573         wasAcr = false;
2574       } else {
2575         wasAcr = true;
2576       }
2577     } else {
2578       // lf 000A
2579       if (currentCharacter == '\n') {
2580         //must merge eventual cr followed by lf
2581         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2582           //System.out.println("merge LF-" + (currentPosition - 1));
2583           lineEnds[linePtr] = currentPosition - 6;
2584         } else {
2585           int separatorPos = currentPosition - 6;
2586           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2587             return;
2588           // System.out.println("LF-" + separatorPos);
2589           try {
2590             lineEnds[++linePtr] = separatorPos;
2591           } catch (IndexOutOfBoundsException e) {
2592             //linePtr value is correct
2593             int oldLength = lineEnds.length;
2594             int[] old = lineEnds;
2595             lineEnds = new int[oldLength + INCREMENT];
2596             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2597             lineEnds[linePtr] = separatorPos;
2598           }
2599         }
2600         wasAcr = false;
2601       }
2602     }
2603   }
2604   public void recordComment(int token) {
2605         // compute position
2606         int stopPosition = this.currentPosition;
2607         switch (token) {
2608                 case TokenNameCOMMENT_LINE:
2609                         stopPosition = -this.lastCommentLinePosition;
2610                         break;
2611                 case TokenNameCOMMENT_BLOCK:
2612                         stopPosition = -this.currentPosition;
2613                         break;
2614         }
2615
2616         // a new comment is recorded
2617         int length = this.commentStops.length;
2618         if (++this.commentPtr >=  length) {
2619                 System.arraycopy(this.commentStops, 0, this.commentStops = new int[length + 30], 0, length);
2620                 //grows the positions buffers too
2621                 System.arraycopy(this.commentStarts, 0, this.commentStarts = new int[length + 30], 0, length);
2622         }
2623         this.commentStops[this.commentPtr] = stopPosition;
2624         this.commentStarts[this.commentPtr] = this.startPosition;
2625 }
2626 //  public final void recordComment(boolean isJavadoc) {
2627 //    // a new annotation comment is recorded
2628 //    try {
2629 //      commentStops[++commentPtr] = isJavadoc
2630 //          ? currentPosition
2631 //          : -currentPosition;
2632 //    } catch (IndexOutOfBoundsException e) {
2633 //      int oldStackLength = commentStops.length;
2634 //      int[] oldStack = commentStops;
2635 //      commentStops = new int[oldStackLength + 30];
2636 //      System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2637 //      commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2638 //      //grows the positions buffers too
2639 //      int[] old = commentStarts;
2640 //      commentStarts = new int[oldStackLength + 30];
2641 //      System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2642 //    }
2643 //    //the buffer is of a correct size here
2644 //    commentStarts[commentPtr] = startPosition;
2645 //  }
2646   public void resetTo(int begin, int end) {
2647     //reset the scanner to a given position where it may rescan again
2648     diet = false;
2649     initialPosition = startPosition = currentPosition = begin;
2650     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2651     commentPtr = -1; // reset comment stack
2652   }
2653   public final void scanSingleQuotedEscapeCharacter()
2654       throws InvalidInputException {
2655     // the string with "\\u" is a legal string of two chars \ and u
2656     //thus we use a direct access to the source (for regular cases).
2657     //    if (unicodeAsBackSlash) {
2658     //      // consume next character
2659     //      unicodeAsBackSlash = false;
2660     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2661     //        && (source[currentPosition] == 'u')) {
2662     //        getNextUnicodeChar();
2663     //      } else {
2664     //        if (withoutUnicodePtr != 0) {
2665     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2666     //        }
2667     //      }
2668     //    } else
2669     currentCharacter = source[currentPosition++];
2670     switch (currentCharacter) {
2671       case '\'' :
2672         currentCharacter = '\'';
2673         break;
2674       case '\\' :
2675         currentCharacter = '\\';
2676         break;
2677       default :
2678         currentCharacter = '\\';
2679         currentPosition--;
2680     }
2681   }
2682   public final void scanDoubleQuotedEscapeCharacter()
2683       throws InvalidInputException {
2684     // the string with "\\u" is a legal string of two chars \ and u
2685     //thus we use a direct access to the source (for regular cases).
2686     //    if (unicodeAsBackSlash) {
2687     //      // consume next character
2688     //      unicodeAsBackSlash = false;
2689     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2690     //        && (source[currentPosition] == 'u')) {
2691     //        getNextUnicodeChar();
2692     //      } else {
2693     //        if (withoutUnicodePtr != 0) {
2694     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2695     //        }
2696     //      }
2697     //    } else
2698     currentCharacter = source[currentPosition++];
2699     switch (currentCharacter) {
2700       //      case 'b' :
2701       //        currentCharacter = '\b';
2702       //        break;
2703       case 't' :
2704         currentCharacter = '\t';
2705         break;
2706       case 'n' :
2707         currentCharacter = '\n';
2708         break;
2709       //      case 'f' :
2710       //        currentCharacter = '\f';
2711       //        break;
2712       case 'r' :
2713         currentCharacter = '\r';
2714         break;
2715       case '\"' :
2716         currentCharacter = '\"';
2717         break;
2718       case '\'' :
2719         currentCharacter = '\'';
2720         break;
2721       case '\\' :
2722         currentCharacter = '\\';
2723         break;
2724       case '$' :
2725         currentCharacter = '$';
2726         break;
2727       default :
2728         // -----------octal escape--------------
2729         // OctalDigit
2730         // OctalDigit OctalDigit
2731         // ZeroToThree OctalDigit OctalDigit
2732         int number = Character.getNumericValue(currentCharacter);
2733         if (number >= 0 && number <= 7) {
2734           boolean zeroToThreeNot = number > 3;
2735           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2736             int digit = Character.getNumericValue(currentCharacter);
2737             if (digit >= 0 && digit <= 7) {
2738               number = (number * 8) + digit;
2739               if (Character
2740                   .isDigit(currentCharacter = source[currentPosition++])) {
2741                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
2742                   // Digit --> ignore last character
2743                   currentPosition--;
2744                 } else {
2745                   digit = Character.getNumericValue(currentCharacter);
2746                   if (digit >= 0 && digit <= 7) {
2747                     // has read \ZeroToThree OctalDigit OctalDigit
2748                     number = (number * 8) + digit;
2749                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
2750                     // --> ignore last character
2751                     currentPosition--;
2752                   }
2753                 }
2754               } else { // has read \OctalDigit NonDigit--> ignore last
2755                 // character
2756                 currentPosition--;
2757               }
2758             } else { // has read \OctalDigit NonOctalDigit--> ignore last
2759               // character
2760               currentPosition--;
2761             }
2762           } else { // has read \OctalDigit --> ignore last character
2763             currentPosition--;
2764           }
2765           if (number > 255)
2766             throw new InvalidInputException(INVALID_ESCAPE);
2767           currentCharacter = (char) number;
2768         }
2769     //else
2770     //     throw new InvalidInputException(INVALID_ESCAPE);
2771     }
2772   }
2773   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2774   //    return scanIdentifierOrKeyword( false );
2775   //  }
2776   public int scanIdentifierOrKeyword(boolean isVariable)
2777       throws InvalidInputException {
2778     //test keywords
2779     //first dispatch on the first char.
2780     //then the length. If there are several
2781     //keywors with the same length AND the same first char, then do another
2782     //disptach on the second char :-)...cool....but fast !
2783     useAssertAsAnIndentifier = false;
2784     while (getNextCharAsJavaIdentifierPart()) {
2785     };
2786     if (isVariable) {
2787       //      if (new String(getCurrentTokenSource()).equals("$this")) {
2788       //        return TokenNamethis;
2789       //      }
2790       return TokenNameVariable;
2791     }
2792     int index, length;
2793     char[] data;
2794     char firstLetter;
2795     //    if (withoutUnicodePtr == 0)
2796     //quick test on length == 1 but not on length > 12 while most identifier
2797     //have a length which is <= 12...but there are lots of identifier with
2798     //only one char....
2799     //      {
2800     if ((length = currentPosition - startPosition) == 1)
2801       return TokenNameIdentifier;
2802     //  data = source;
2803     data = new char[length];
2804     index = startPosition;
2805     for (int i = 0; i < length; i++) {
2806       data[i] = Character.toLowerCase(source[index + i]);
2807     }
2808     index = 0;
2809     //    } else {
2810     //      if ((length = withoutUnicodePtr) == 1)
2811     //        return TokenNameIdentifier;
2812     //      // data = withoutUnicodeBuffer;
2813     //      data = new char[withoutUnicodeBuffer.length];
2814     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2815     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2816     //      }
2817     //      index = 1;
2818     //    }
2819     firstLetter = data[index];
2820     switch (firstLetter) {
2821       case '_' :
2822         switch (length) {
2823           case 8 :
2824             //__FILE__
2825             if ((data[++index] == '_') && (data[++index] == 'f')
2826                 && (data[++index] == 'i') && (data[++index] == 'l')
2827                 && (data[++index] == 'e') && (data[++index] == '_')
2828                 && (data[++index] == '_'))
2829               return TokenNameFILE;
2830             index = 0; //__LINE__
2831             if ((data[++index] == '_') && (data[++index] == 'l')
2832                 && (data[++index] == 'i') && (data[++index] == 'n')
2833                 && (data[++index] == 'e') && (data[++index] == '_')
2834                 && (data[++index] == '_'))
2835               return TokenNameLINE;
2836             break;
2837           case 9 :
2838             //__CLASS__
2839             if ((data[++index] == '_') && (data[++index] == 'c')
2840                 && (data[++index] == 'l') && (data[++index] == 'a')
2841                 && (data[++index] == 's') && (data[++index] == 's')
2842                 && (data[++index] == '_') && (data[++index] == '_'))
2843               return TokenNameCLASS_C;
2844             break;
2845           case 11 :
2846             //__METHOD__
2847             if ((data[++index] == '_') && (data[++index] == 'm')
2848                 && (data[++index] == 'e') && (data[++index] == 't')
2849                 && (data[++index] == 'h') && (data[++index] == 'o')
2850                 && (data[++index] == 'd') && (data[++index] == '_')
2851                 && (data[++index] == '_'))
2852               return TokenNameMETHOD_C;
2853             break;
2854           case 12 :
2855             //__FUNCTION__
2856             if ((data[++index] == '_') && (data[++index] == 'f')
2857                 && (data[++index] == 'u') && (data[++index] == 'n')
2858                 && (data[++index] == 'c') && (data[++index] == 't')
2859                 && (data[++index] == 'i') && (data[++index] == 'o')
2860                 && (data[++index] == 'n') && (data[++index] == '_')
2861                 && (data[++index] == '_'))
2862               return TokenNameFUNC_C;
2863             break;
2864         }
2865         return TokenNameIdentifier;
2866       case 'a' :
2867         // as and array abstract
2868         switch (length) {
2869           case 2 :
2870             //as
2871             if ((data[++index] == 's')) {
2872               return TokenNameas;
2873             } else {
2874               return TokenNameIdentifier;
2875             }
2876           case 3 :
2877             //and
2878             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2879               return TokenNameand;
2880             } else {
2881               return TokenNameIdentifier;
2882             }
2883           case 5 :
2884             // array
2885             if ((data[++index] == 'r') && (data[++index] == 'r')
2886                 && (data[++index] == 'a') && (data[++index] == 'y'))
2887               return TokenNamearray;
2888             else
2889               return TokenNameIdentifier;
2890           case 8 :
2891             if ((data[++index] == 'b') && (data[++index] == 's')
2892                 && (data[++index] == 't') && (data[++index] == 'r')
2893                 && (data[++index] == 'a') && (data[++index] == 'c')
2894                 && (data[++index] == 't'))
2895               return TokenNameabstract;
2896             else
2897               return TokenNameIdentifier;
2898           default :
2899             return TokenNameIdentifier;
2900         }
2901       case 'b' :
2902         //break
2903         switch (length) {
2904           case 5 :
2905             if ((data[++index] == 'r') && (data[++index] == 'e')
2906                 && (data[++index] == 'a') && (data[++index] == 'k'))
2907               return TokenNamebreak;
2908             else
2909               return TokenNameIdentifier;
2910           default :
2911             return TokenNameIdentifier;
2912         }
2913       case 'c' :
2914         //case catch class clone const continue
2915         switch (length) {
2916           case 4 :
2917             if ((data[++index] == 'a') && (data[++index] == 's')
2918                 && (data[++index] == 'e'))
2919               return TokenNamecase;
2920             else
2921               return TokenNameIdentifier;
2922           case 5 :
2923             if ((data[++index] == 'a') && (data[++index] == 't')
2924                 && (data[++index] == 'c') && (data[++index] == 'h'))
2925               return TokenNamecatch;
2926             index = 0;
2927             if ((data[++index] == 'l') && (data[++index] == 'a')
2928                 && (data[++index] == 's') && (data[++index] == 's'))
2929               return TokenNameclass;
2930             index = 0;
2931             if ((data[++index] == 'l') && (data[++index] == 'o')
2932                 && (data[++index] == 'n') && (data[++index] == 'e'))
2933               return TokenNameclone;
2934             index = 0;
2935             if ((data[++index] == 'o') && (data[++index] == 'n')
2936                 && (data[++index] == 's') && (data[++index] == 't'))
2937               return TokenNameconst;
2938             else
2939               return TokenNameIdentifier;
2940           case 8 :
2941             if ((data[++index] == 'o') && (data[++index] == 'n')
2942                 && (data[++index] == 't') && (data[++index] == 'i')
2943                 && (data[++index] == 'n') && (data[++index] == 'u')
2944                 && (data[++index] == 'e'))
2945               return TokenNamecontinue;
2946             else
2947               return TokenNameIdentifier;
2948           default :
2949             return TokenNameIdentifier;
2950         }
2951       case 'd' :
2952         // declare default do die
2953         // TODO delete define ==> no keyword !
2954         switch (length) {
2955           case 2 :
2956             if ((data[++index] == 'o'))
2957               return TokenNamedo;
2958             else
2959               return TokenNameIdentifier;
2960           //          case 6 :
2961           //            if ((data[++index] == 'e')
2962           //              && (data[++index] == 'f')
2963           //              && (data[++index] == 'i')
2964           //              && (data[++index] == 'n')
2965           //              && (data[++index] == 'e'))
2966           //              return TokenNamedefine;
2967           //            else
2968           //              return TokenNameIdentifier;
2969           case 7 :
2970             if ((data[++index] == 'e') && (data[++index] == 'c')
2971                 && (data[++index] == 'l') && (data[++index] == 'a')
2972                 && (data[++index] == 'r') && (data[++index] == 'e'))
2973               return TokenNamedeclare;
2974             index = 0;
2975             if ((data[++index] == 'e') && (data[++index] == 'f')
2976                 && (data[++index] == 'a') && (data[++index] == 'u')
2977                 && (data[++index] == 'l') && (data[++index] == 't'))
2978               return TokenNamedefault;
2979             else
2980               return TokenNameIdentifier;
2981           default :
2982             return TokenNameIdentifier;
2983         }
2984       case 'e' :
2985         //echo else exit elseif extends eval
2986         switch (length) {
2987           case 4 :
2988             if ((data[++index] == 'c') && (data[++index] == 'h')
2989                 && (data[++index] == 'o'))
2990               return TokenNameecho;
2991             else if ((data[index] == 'l') && (data[++index] == 's')
2992                 && (data[++index] == 'e'))
2993               return TokenNameelse;
2994             else if ((data[index] == 'x') && (data[++index] == 'i')
2995                 && (data[++index] == 't'))
2996               return TokenNameexit;
2997             else if ((data[index] == 'v') && (data[++index] == 'a')
2998                 && (data[++index] == 'l'))
2999               return TokenNameeval;
3000             else
3001               return TokenNameIdentifier;
3002           case 5 :
3003             // endif empty
3004             if ((data[++index] == 'n') && (data[++index] == 'd')
3005                 && (data[++index] == 'i') && (data[++index] == 'f'))
3006               return TokenNameendif;
3007             if ((data[index] == 'm') && (data[++index] == 'p')
3008                 && (data[++index] == 't') && (data[++index] == 'y'))
3009               return TokenNameempty;
3010             else
3011               return TokenNameIdentifier;
3012           case 6 :
3013             // endfor
3014             if ((data[++index] == 'n') && (data[++index] == 'd')
3015                 && (data[++index] == 'f') && (data[++index] == 'o')
3016                 && (data[++index] == 'r'))
3017               return TokenNameendfor;
3018             else if ((data[index] == 'l') && (data[++index] == 's')
3019                 && (data[++index] == 'e') && (data[++index] == 'i')
3020                 && (data[++index] == 'f'))
3021               return TokenNameelseif;
3022             else
3023               return TokenNameIdentifier;
3024           case 7 :
3025             if ((data[++index] == 'x') && (data[++index] == 't')
3026                 && (data[++index] == 'e') && (data[++index] == 'n')
3027                 && (data[++index] == 'd') && (data[++index] == 's'))
3028               return TokenNameextends;
3029             else
3030               return TokenNameIdentifier;
3031           case 8 :
3032             // endwhile
3033             if ((data[++index] == 'n') && (data[++index] == 'd')
3034                 && (data[++index] == 'w') && (data[++index] == 'h')
3035                 && (data[++index] == 'i') && (data[++index] == 'l')
3036                 && (data[++index] == 'e'))
3037               return TokenNameendwhile;
3038             else
3039               return TokenNameIdentifier;
3040           case 9 :
3041             // endswitch
3042             if ((data[++index] == 'n') && (data[++index] == 'd')
3043                 && (data[++index] == 's') && (data[++index] == 'w')
3044                 && (data[++index] == 'i') && (data[++index] == 't')
3045                 && (data[++index] == 'c') && (data[++index] == 'h'))
3046               return TokenNameendswitch;
3047             else
3048               return TokenNameIdentifier;
3049           case 10 :
3050             // enddeclare
3051             if ((data[++index] == 'n') && (data[++index] == 'd')
3052                 && (data[++index] == 'd') && (data[++index] == 'e')
3053                 && (data[++index] == 'c') && (data[++index] == 'l')
3054                 && (data[++index] == 'a') && (data[++index] == 'r')
3055                 && (data[++index] == 'e'))
3056               return TokenNameendforeach;
3057             index = 0;
3058             if ((data[++index] == 'n') // endforeach
3059                 && (data[++index] == 'd') && (data[++index] == 'f')
3060                 && (data[++index] == 'o') && (data[++index] == 'r')
3061                 && (data[++index] == 'e') && (data[++index] == 'a')
3062                 && (data[++index] == 'c') && (data[++index] == 'h'))
3063               return TokenNameendforeach;
3064             else
3065               return TokenNameIdentifier;
3066           default :
3067             return TokenNameIdentifier;
3068         }
3069       case 'f' :
3070         //for false final function
3071         switch (length) {
3072           case 3 :
3073             if ((data[++index] == 'o') && (data[++index] == 'r'))
3074               return TokenNamefor;
3075             else
3076               return TokenNameIdentifier;
3077           case 5 :
3078             //            if ((data[++index] == 'a') && (data[++index] == 'l')
3079             //                && (data[++index] == 's') && (data[++index] == 'e'))
3080             //              return TokenNamefalse;
3081             if ((data[++index] == 'i') && (data[++index] == 'n')
3082                 && (data[++index] == 'a') && (data[++index] == 'l'))
3083               return TokenNamefinal;
3084             else
3085               return TokenNameIdentifier;
3086           case 7 :
3087             // foreach
3088             if ((data[++index] == 'o') && (data[++index] == 'r')
3089                 && (data[++index] == 'e') && (data[++index] == 'a')
3090                 && (data[++index] == 'c') && (data[++index] == 'h'))
3091               return TokenNameforeach;
3092             else
3093               return TokenNameIdentifier;
3094           case 8 :
3095             // function
3096             if ((data[++index] == 'u') && (data[++index] == 'n')
3097                 && (data[++index] == 'c') && (data[++index] == 't')
3098                 && (data[++index] == 'i') && (data[++index] == 'o')
3099                 && (data[++index] == 'n'))
3100               return TokenNamefunction;
3101             else
3102               return TokenNameIdentifier;
3103           default :
3104             return TokenNameIdentifier;
3105         }
3106       case 'g' :
3107         //global
3108         if (length == 6) {
3109           if ((data[++index] == 'l') && (data[++index] == 'o')
3110               && (data[++index] == 'b') && (data[++index] == 'a')
3111               && (data[++index] == 'l')) {
3112             return TokenNameglobal;
3113           }
3114         }
3115         return TokenNameIdentifier;
3116       case 'i' :
3117         //if int isset include include_once instanceof interface implements
3118         switch (length) {
3119           case 2 :
3120             if (data[++index] == 'f')
3121               return TokenNameif;
3122             else
3123               return TokenNameIdentifier;
3124           //          case 3 :
3125           //            if ((data[++index] == 'n') && (data[++index] == 't'))
3126           //              return TokenNameint;
3127           //            else
3128           //              return TokenNameIdentifier;
3129           case 5 :
3130             if ((data[++index] == 's') && (data[++index] == 's')
3131                 && (data[++index] == 'e') && (data[++index] == 't'))
3132               return TokenNameisset;
3133             else
3134               return TokenNameIdentifier;
3135           case 7 :
3136             if ((data[++index] == 'n') && (data[++index] == 'c')
3137                 && (data[++index] == 'l') && (data[++index] == 'u')
3138                 && (data[++index] == 'd') && (data[++index] == 'e'))
3139               return TokenNameinclude;
3140             else
3141               return TokenNameIdentifier;
3142           case 9 :
3143             // interface
3144             if ((data[++index] == 'n') && (data[++index] == 't')
3145                 && (data[++index] == 'e') && (data[++index] == 'r')
3146                 && (data[++index] == 'f') && (data[++index] == 'a')
3147                 && (data[++index] == 'c') && (data[++index] == 'e'))
3148               return TokenNameinterface;
3149             else
3150               return TokenNameIdentifier;
3151           case 10 :
3152             // instanceof
3153             if ((data[++index] == 'n') && (data[++index] == 's')
3154                 && (data[++index] == 't') && (data[++index] == 'a')
3155                 && (data[++index] == 'n') && (data[++index] == 'c')
3156                 && (data[++index] == 'e') && (data[++index] == 'o')
3157                 && (data[++index] == 'f'))
3158               return TokenNameinstanceof;
3159             if ((data[index] == 'm') && (data[++index] == 'p')
3160                 && (data[++index] == 'l') && (data[++index] == 'e')
3161                 && (data[++index] == 'm') && (data[++index] == 'e')
3162                 && (data[++index] == 'n') && (data[++index] == 't')
3163                 && (data[++index] == 's'))
3164               return TokenNameimplements;
3165             else
3166               return TokenNameIdentifier;
3167           case 12 :
3168             if ((data[++index] == 'n') && (data[++index] == 'c')
3169                 && (data[++index] == 'l') && (data[++index] == 'u')
3170                 && (data[++index] == 'd') && (data[++index] == 'e')
3171                 && (data[++index] == '_') && (data[++index] == 'o')
3172                 && (data[++index] == 'n') && (data[++index] == 'c')
3173                 && (data[++index] == 'e'))
3174               return TokenNameinclude_once;
3175             else
3176               return TokenNameIdentifier;
3177           default :
3178             return TokenNameIdentifier;
3179         }
3180       case 'l' :
3181         //list
3182         if (length == 4) {
3183           if ((data[++index] == 'i') && (data[++index] == 's')
3184               && (data[++index] == 't')) {
3185             return TokenNamelist;
3186           }
3187         }
3188         return TokenNameIdentifier;
3189       case 'n' :
3190         // new null
3191         switch (length) {
3192           case 3 :
3193             if ((data[++index] == 'e') && (data[++index] == 'w'))
3194               return TokenNamenew;
3195             else
3196               return TokenNameIdentifier;
3197           //          case 4 :
3198           //            if ((data[++index] == 'u') && (data[++index] == 'l')
3199           //                && (data[++index] == 'l'))
3200           //              return TokenNamenull;
3201           //            else
3202           //              return TokenNameIdentifier;
3203           default :
3204             return TokenNameIdentifier;
3205         }
3206       case 'o' :
3207         // or old_function
3208         if (length == 2) {
3209           if (data[++index] == 'r') {
3210             return TokenNameor;
3211           }
3212         }
3213         //        if (length == 12) {
3214         //          if ((data[++index] == 'l')
3215         //            && (data[++index] == 'd')
3216         //            && (data[++index] == '_')
3217         //            && (data[++index] == 'f')
3218         //            && (data[++index] == 'u')
3219         //            && (data[++index] == 'n')
3220         //            && (data[++index] == 'c')
3221         //            && (data[++index] == 't')
3222         //            && (data[++index] == 'i')
3223         //            && (data[++index] == 'o')
3224         //            && (data[++index] == 'n')) {
3225         //            return TokenNameold_function;
3226         //          }
3227         //        }
3228         return TokenNameIdentifier;
3229       case 'p' :
3230         // print public private protected
3231         switch (length) {
3232           case 5 :
3233             if ((data[++index] == 'r') && (data[++index] == 'i')
3234                 && (data[++index] == 'n') && (data[++index] == 't')) {
3235               return TokenNameprint;
3236             } else
3237               return TokenNameIdentifier;
3238           case 6 :
3239             if ((data[++index] == 'u') && (data[++index] == 'b')
3240                 && (data[++index] == 'l') && (data[++index] == 'i')
3241                 && (data[++index] == 'c')) {
3242               return TokenNamepublic;
3243             } else
3244               return TokenNameIdentifier;
3245           case 7 :
3246             if ((data[++index] == 'r') && (data[++index] == 'i')
3247                 && (data[++index] == 'v') && (data[++index] == 'a')
3248                 && (data[++index] == 't') && (data[++index] == 'e')) {
3249               return TokenNameprivate;
3250             } else
3251               return TokenNameIdentifier;
3252           case 9 :
3253             if ((data[++index] == 'r') && (data[++index] == 'o')
3254                 && (data[++index] == 't') && (data[++index] == 'e')
3255                 && (data[++index] == 'c') && (data[++index] == 't')
3256                 && (data[++index] == 'e') && (data[++index] == 'd')) {
3257               return TokenNameprotected;
3258             } else
3259               return TokenNameIdentifier;
3260         }
3261         return TokenNameIdentifier;
3262       case 'r' :
3263         //return require require_once
3264         if (length == 6) {
3265           if ((data[++index] == 'e') && (data[++index] == 't')
3266               && (data[++index] == 'u') && (data[++index] == 'r')
3267               && (data[++index] == 'n')) {
3268             return TokenNamereturn;
3269           }
3270         } else if (length == 7) {
3271           if ((data[++index] == 'e') && (data[++index] == 'q')
3272               && (data[++index] == 'u') && (data[++index] == 'i')
3273               && (data[++index] == 'r') && (data[++index] == 'e')) {
3274             return TokenNamerequire;
3275           }
3276         } else if (length == 12) {
3277           if ((data[++index] == 'e') && (data[++index] == 'q')
3278               && (data[++index] == 'u') && (data[++index] == 'i')
3279               && (data[++index] == 'r') && (data[++index] == 'e')
3280               && (data[++index] == '_') && (data[++index] == 'o')
3281               && (data[++index] == 'n') && (data[++index] == 'c')
3282               && (data[++index] == 'e')) {
3283             return TokenNamerequire_once;
3284           }
3285         } else
3286           return TokenNameIdentifier;
3287       case 's' :
3288         //static switch
3289         switch (length) {
3290           case 6 :
3291             if (data[++index] == 't')
3292               if ((data[++index] == 'a') && (data[++index] == 't')
3293                   && (data[++index] == 'i') && (data[++index] == 'c')) {
3294                 return TokenNamestatic;
3295               } else
3296                 return TokenNameIdentifier;
3297             else if ((data[index] == 'w') && (data[++index] == 'i')
3298                 && (data[++index] == 't') && (data[++index] == 'c')
3299                 && (data[++index] == 'h'))
3300               return TokenNameswitch;
3301             else
3302               return TokenNameIdentifier;
3303           default :
3304             return TokenNameIdentifier;
3305         }
3306       case 't' :
3307         // try true throw
3308         switch (length) {
3309           case 3 :
3310             if ((data[++index] == 'r') && (data[++index] == 'y'))
3311               return TokenNametry;
3312             else
3313               return TokenNameIdentifier;
3314           //          case 4 :
3315           //            if ((data[++index] == 'r') && (data[++index] == 'u')
3316           //                && (data[++index] == 'e'))
3317           //              return TokenNametrue;
3318           //            else
3319           //              return TokenNameIdentifier;
3320           case 5 :
3321             if ((data[++index] == 'h') && (data[++index] == 'r')
3322                 && (data[++index] == 'o') && (data[++index] == 'w'))
3323               return TokenNamethrow;
3324             else
3325               return TokenNameIdentifier;
3326           default :
3327             return TokenNameIdentifier;
3328         }
3329       case 'u' :
3330         //use unset
3331         switch (length) {
3332           case 3 :
3333             if ((data[++index] == 's') && (data[++index] == 'e'))
3334               return TokenNameuse;
3335             else
3336               return TokenNameIdentifier;
3337           case 5 :
3338             if ((data[++index] == 'n') && (data[++index] == 's')
3339                 && (data[++index] == 'e') && (data[++index] == 't'))
3340               return TokenNameunset;
3341             else
3342               return TokenNameIdentifier;
3343           default :
3344             return TokenNameIdentifier;
3345         }
3346       case 'v' :
3347         //var
3348         switch (length) {
3349           case 3 :
3350             if ((data[++index] == 'a') && (data[++index] == 'r'))
3351               return TokenNamevar;
3352             else
3353               return TokenNameIdentifier;
3354           default :
3355             return TokenNameIdentifier;
3356         }
3357       case 'w' :
3358         //while
3359         switch (length) {
3360           case 5 :
3361             if ((data[++index] == 'h') && (data[++index] == 'i')
3362                 && (data[++index] == 'l') && (data[++index] == 'e'))
3363               return TokenNamewhile;
3364             else
3365               return TokenNameIdentifier;
3366           //case 6:if ( (data[++index] =='i') && (data[++index]=='d') &&
3367           // (data[++index]=='e') && (data[++index]=='f')&&
3368           // (data[++index]=='p'))
3369           //return TokenNamewidefp ;
3370           //else
3371           //return TokenNameIdentifier;
3372           default :
3373             return TokenNameIdentifier;
3374         }
3375       case 'x' :
3376         //xor
3377         switch (length) {
3378           case 3 :
3379             if ((data[++index] == 'o') && (data[++index] == 'r'))
3380               return TokenNamexor;
3381             else
3382               return TokenNameIdentifier;
3383           default :
3384             return TokenNameIdentifier;
3385         }
3386       default :
3387         return TokenNameIdentifier;
3388     }
3389   }
3390   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3391     //when entering this method the currentCharacter is the firt
3392     //digit of the number , i.e. it may be preceeded by a . when
3393     //dotPrefix is true
3394     boolean floating = dotPrefix;
3395     if ((!dotPrefix) && (currentCharacter == '0')) {
3396       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3397         //force the first char of the hexa number do exist...
3398         // consume next character
3399         unicodeAsBackSlash = false;
3400         currentCharacter = source[currentPosition++];
3401         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3402         //          && (source[currentPosition] == 'u')) {
3403         //          getNextUnicodeChar();
3404         //        } else {
3405         //          if (withoutUnicodePtr != 0) {
3406         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3407         //          }
3408         //        }
3409         if (Character.digit(currentCharacter, 16) == -1)
3410           throw new InvalidInputException(INVALID_HEXA);
3411         //---end forcing--
3412         while (getNextCharAsDigit(16)) {
3413         };
3414         //        if (getNextChar('l', 'L') >= 0)
3415         //          return TokenNameLongLiteral;
3416         //        else
3417         return TokenNameIntegerLiteral;
3418       }
3419       //there is x or X in the number
3420       //potential octal ! ... some one may write 000099.0 ! thus 00100 <
3421       // 00078.0 is true !!!!! crazy language
3422       if (getNextCharAsDigit()) {
3423         //-------------potential octal-----------------
3424         while (getNextCharAsDigit()) {
3425         };
3426         //        if (getNextChar('l', 'L') >= 0) {
3427         //          return TokenNameLongLiteral;
3428         //        }
3429         //
3430         //        if (getNextChar('f', 'F') >= 0) {
3431         //          return TokenNameFloatingPointLiteral;
3432         //        }
3433         if (getNextChar('d', 'D') >= 0) {
3434           return TokenNameDoubleLiteral;
3435         } else { //make the distinction between octal and float ....
3436           if (getNextChar('.')) { //bingo ! ....
3437             while (getNextCharAsDigit()) {
3438             };
3439             if (getNextChar('e', 'E') >= 0) {
3440               // consume next character
3441               unicodeAsBackSlash = false;
3442               currentCharacter = source[currentPosition++];
3443               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3444               //                && (source[currentPosition] == 'u')) {
3445               //                getNextUnicodeChar();
3446               //              } else {
3447               //                if (withoutUnicodePtr != 0) {
3448               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3449               //                }
3450               //              }
3451               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3452                 // consume next character
3453                 unicodeAsBackSlash = false;
3454                 currentCharacter = source[currentPosition++];
3455                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3456                 //                  && (source[currentPosition] == 'u')) {
3457                 //                  getNextUnicodeChar();
3458                 //                } else {
3459                 //                  if (withoutUnicodePtr != 0) {
3460                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3461                 //                      currentCharacter;
3462                 //                  }
3463                 //                }
3464               }
3465               if (!Character.isDigit(currentCharacter))
3466                 throw new InvalidInputException(INVALID_FLOAT);
3467               while (getNextCharAsDigit()) {
3468               };
3469             }
3470             //            if (getNextChar('f', 'F') >= 0)
3471             //              return TokenNameFloatingPointLiteral;
3472             getNextChar('d', 'D'); //jump over potential d or D
3473             return TokenNameDoubleLiteral;
3474           } else {
3475             return TokenNameIntegerLiteral;
3476           }
3477         }
3478       } else {
3479         /* carry on */
3480       }
3481     }
3482     while (getNextCharAsDigit()) {
3483     };
3484     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3485     //      return TokenNameLongLiteral;
3486     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3487       while (getNextCharAsDigit()) {
3488       };
3489       floating = true;
3490     }
3491     //if floating is true both exponant and suffix may be optional
3492     if (getNextChar('e', 'E') >= 0) {
3493       floating = true;
3494       // consume next character
3495       unicodeAsBackSlash = false;
3496       currentCharacter = source[currentPosition++];
3497       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3498       //        && (source[currentPosition] == 'u')) {
3499       //        getNextUnicodeChar();
3500       //      } else {
3501       //        if (withoutUnicodePtr != 0) {
3502       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3503       //        }
3504       //      }
3505       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume
3506         // next
3507         // character
3508         unicodeAsBackSlash = false;
3509         currentCharacter = source[currentPosition++];
3510         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3511         //          && (source[currentPosition] == 'u')) {
3512         //          getNextUnicodeChar();
3513         //        } else {
3514         //          if (withoutUnicodePtr != 0) {
3515         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3516         //          }
3517         //        }
3518       }
3519       if (!Character.isDigit(currentCharacter))
3520         throw new InvalidInputException(INVALID_FLOAT);
3521       while (getNextCharAsDigit()) {
3522       };
3523     }
3524     if (getNextChar('d', 'D') >= 0)
3525       return TokenNameDoubleLiteral;
3526     //    if (getNextChar('f', 'F') >= 0)
3527     //      return TokenNameFloatingPointLiteral;
3528     //the long flag has been tested before
3529     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3530   }
3531   /**
3532    * Search the line number corresponding to a specific position
3533    *  
3534    */
3535   public final int getLineNumber(int position) {
3536     if (lineEnds == null)
3537       return 1;
3538     int length = linePtr + 1;
3539     if (length == 0)
3540       return 1;
3541     int g = 0, d = length - 1;
3542     int m = 0;
3543     while (g <= d) {
3544       m = (g + d) / 2;
3545       if (position < lineEnds[m]) {
3546         d = m - 1;
3547       } else if (position > lineEnds[m]) {
3548         g = m + 1;
3549       } else {
3550         return m + 1;
3551       }
3552     }
3553     if (position < lineEnds[m]) {
3554       return m + 1;
3555     }
3556     return m + 2;
3557   }
3558   public void setPHPMode(boolean mode) {
3559     phpMode = mode;
3560   }
3561   public final void setSource(char[] source) {
3562     //the source-buffer is set to sourceString
3563     if (source == null) {
3564       this.source = new char[0];
3565     } else {
3566       this.source = source;
3567     }
3568     startPosition = -1;
3569     initialPosition = currentPosition = 0;
3570     containsAssertKeyword = false;
3571     withoutUnicodeBuffer = new char[this.source.length];
3572     encapsedStringStack = new Stack();
3573   }
3574   public String toString() {
3575     if (startPosition == source.length)
3576       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3577     if (currentPosition > source.length)
3578       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3579     char front[] = new char[startPosition];
3580     System.arraycopy(source, 0, front, 0, startPosition);
3581     int middleLength = (currentPosition - 1) - startPosition + 1;
3582     char middle[];
3583     if (middleLength > -1) {
3584       middle = new char[middleLength];
3585       System.arraycopy(source, startPosition, middle, 0, middleLength);
3586     } else {
3587       middle = new char[0];
3588     }
3589     char end[] = new char[source.length - (currentPosition - 1)];
3590     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length
3591         - (currentPosition - 1) - 1);
3592     return new String(front)
3593         + "\n===============================\nStarts here -->" //$NON-NLS-1$
3594         + new String(middle)
3595         + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3596         + new String(end);
3597   }
3598   public final String toStringAction(int act) {
3599     switch (act) {
3600       case TokenNameERROR :
3601         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")";
3602       // //$NON-NLS-1$
3603       case TokenNameINLINE_HTML :
3604         return "Inline-HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3605       case TokenNameIdentifier :
3606         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3607       case TokenNameVariable :
3608         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3609       case TokenNameabstract :
3610         return "abstract"; //$NON-NLS-1$
3611       case TokenNameand :
3612         return "AND"; //$NON-NLS-1$
3613       case TokenNamearray :
3614         return "array"; //$NON-NLS-1$
3615       case TokenNameas :
3616         return "as"; //$NON-NLS-1$
3617       case TokenNamebreak :
3618         return "break"; //$NON-NLS-1$
3619       case TokenNamecase :
3620         return "case"; //$NON-NLS-1$
3621       case TokenNameclass :
3622         return "class"; //$NON-NLS-1$
3623       case TokenNamecatch :
3624         return "catch"; //$NON-NLS-1$
3625       case TokenNameclone :
3626         //$NON-NLS-1$
3627         return "clone";
3628       case TokenNameconst :
3629         //$NON-NLS-1$
3630         return "const";
3631       case TokenNamecontinue :
3632         return "continue"; //$NON-NLS-1$
3633       case TokenNamedefault :
3634         return "default"; //$NON-NLS-1$
3635       //      case TokenNamedefine :
3636       //        return "define"; //$NON-NLS-1$
3637       case TokenNamedo :
3638         return "do"; //$NON-NLS-1$
3639       case TokenNameecho :
3640         return "echo"; //$NON-NLS-1$
3641       case TokenNameelse :
3642         return "else"; //$NON-NLS-1$
3643       case TokenNameelseif :
3644         return "elseif"; //$NON-NLS-1$
3645       case TokenNameendfor :
3646         return "endfor"; //$NON-NLS-1$
3647       case TokenNameendforeach :
3648         return "endforeach"; //$NON-NLS-1$
3649       case TokenNameendif :
3650         return "endif"; //$NON-NLS-1$
3651       case TokenNameendswitch :
3652         return "endswitch"; //$NON-NLS-1$
3653       case TokenNameendwhile :
3654         return "endwhile"; //$NON-NLS-1$
3655       case TokenNameexit:
3656         return "exit";
3657       case TokenNameextends :
3658         return "extends"; //$NON-NLS-1$
3659       //      case TokenNamefalse :
3660       //        return "false"; //$NON-NLS-1$
3661       case TokenNamefinal :
3662         return "final"; //$NON-NLS-1$
3663       case TokenNamefor :
3664         return "for"; //$NON-NLS-1$
3665       case TokenNameforeach :
3666         return "foreach"; //$NON-NLS-1$
3667       case TokenNamefunction :
3668         return "function"; //$NON-NLS-1$
3669       case TokenNameglobal :
3670         return "global"; //$NON-NLS-1$
3671       case TokenNameif :
3672         return "if"; //$NON-NLS-1$
3673       case TokenNameimplements :
3674         return "implements"; //$NON-NLS-1$
3675       case TokenNameinclude :
3676         return "include"; //$NON-NLS-1$
3677       case TokenNameinclude_once :
3678         return "include_once"; //$NON-NLS-1$
3679       case TokenNameinstanceof :
3680         return "instanceof"; //$NON-NLS-1$
3681       case TokenNameinterface :
3682         return "interface"; //$NON-NLS-1$
3683       case TokenNameisset :
3684         return "isset"; //$NON-NLS-1$
3685       case TokenNamelist :
3686         return "list"; //$NON-NLS-1$
3687       case TokenNamenew :
3688         return "new"; //$NON-NLS-1$
3689       //      case TokenNamenull :
3690       //        return "null"; //$NON-NLS-1$
3691       case TokenNameor :
3692         return "OR"; //$NON-NLS-1$
3693       case TokenNameprint :
3694         return "print"; //$NON-NLS-1$
3695       case TokenNameprivate :
3696         return "private"; //$NON-NLS-1$
3697       case TokenNameprotected :
3698         return "protected"; //$NON-NLS-1$
3699       case TokenNamepublic :
3700         return "public"; //$NON-NLS-1$
3701       case TokenNamerequire :
3702         return "require"; //$NON-NLS-1$
3703       case TokenNamerequire_once :
3704         return "require_once"; //$NON-NLS-1$
3705       case TokenNamereturn :
3706         return "return"; //$NON-NLS-1$
3707       case TokenNamestatic :
3708         return "static"; //$NON-NLS-1$
3709       case TokenNameswitch :
3710         return "switch"; //$NON-NLS-1$
3711       //      case TokenNametrue :
3712       //        return "true"; //$NON-NLS-1$
3713       case TokenNameunset :
3714         return "unset"; //$NON-NLS-1$
3715       case TokenNamevar :
3716         return "var"; //$NON-NLS-1$
3717       case TokenNamewhile :
3718         return "while"; //$NON-NLS-1$
3719       case TokenNamexor :
3720         return "XOR"; //$NON-NLS-1$
3721       //      case TokenNamethis :
3722       //        return "$this"; //$NON-NLS-1$
3723       case TokenNameIntegerLiteral :
3724         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3725       case TokenNameDoubleLiteral :
3726         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3727       case TokenNameStringLiteral :
3728         return "StringLiteral(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3729       case TokenNameStringConstant :
3730         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3731       case TokenNameStringInterpolated :
3732         return "StringInterpolated(" + new String(getCurrentTokenSource())
3733             + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3734       case TokenNameEncapsedString0 :
3735         return "`"; //$NON-NLS-1$  
3736       case TokenNameEncapsedString1 :
3737         return "\'"; //$NON-NLS-1$  
3738       case TokenNameEncapsedString2 :
3739         return "\""; //$NON-NLS-1$  
3740       case TokenNameSTRING :
3741         return "STRING(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3742       case TokenNameHEREDOC :
3743         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3744       case TokenNamePLUS_PLUS :
3745         return "++"; //$NON-NLS-1$
3746       case TokenNameMINUS_MINUS :
3747         return "--"; //$NON-NLS-1$
3748       case TokenNameEQUAL_EQUAL :
3749         return "=="; //$NON-NLS-1$
3750       case TokenNameEQUAL_EQUAL_EQUAL :
3751         return "==="; //$NON-NLS-1$
3752       case TokenNameEQUAL_GREATER :
3753         return "=>"; //$NON-NLS-1$
3754       case TokenNameLESS_EQUAL :
3755         return "<="; //$NON-NLS-1$
3756       case TokenNameGREATER_EQUAL :
3757         return ">="; //$NON-NLS-1$
3758       case TokenNameNOT_EQUAL :
3759         return "!="; //$NON-NLS-1$
3760       case TokenNameNOT_EQUAL_EQUAL :
3761         return "!=="; //$NON-NLS-1$
3762       case TokenNameLEFT_SHIFT :
3763         return "<<"; //$NON-NLS-1$
3764       case TokenNameRIGHT_SHIFT :
3765         return ">>"; //$NON-NLS-1$
3766       case TokenNamePLUS_EQUAL :
3767         return "+="; //$NON-NLS-1$
3768       case TokenNameMINUS_EQUAL :
3769         return "-="; //$NON-NLS-1$
3770       case TokenNameMULTIPLY_EQUAL :
3771         return "*="; //$NON-NLS-1$
3772       case TokenNameDIVIDE_EQUAL :
3773         return "/="; //$NON-NLS-1$
3774       case TokenNameAND_EQUAL :
3775         return "&="; //$NON-NLS-1$
3776       case TokenNameOR_EQUAL :
3777         return "|="; //$NON-NLS-1$
3778       case TokenNameXOR_EQUAL :
3779         return "^="; //$NON-NLS-1$
3780       case TokenNameREMAINDER_EQUAL :
3781         return "%="; //$NON-NLS-1$
3782       case TokenNameDOT_EQUAL :
3783         return ".="; //$NON-NLS-1$
3784       case TokenNameLEFT_SHIFT_EQUAL :
3785         return "<<="; //$NON-NLS-1$
3786       case TokenNameRIGHT_SHIFT_EQUAL :
3787         return ">>="; //$NON-NLS-1$
3788       case TokenNameOR_OR :
3789         return "||"; //$NON-NLS-1$
3790       case TokenNameAND_AND :
3791         return "&&"; //$NON-NLS-1$
3792       case TokenNamePLUS :
3793         return "+"; //$NON-NLS-1$
3794       case TokenNameMINUS :
3795         return "-"; //$NON-NLS-1$
3796       case TokenNameMINUS_GREATER :
3797         return "->";
3798       case TokenNameNOT :
3799         return "!"; //$NON-NLS-1$
3800       case TokenNameREMAINDER :
3801         return "%"; //$NON-NLS-1$
3802       case TokenNameXOR :
3803         return "^"; //$NON-NLS-1$
3804       case TokenNameAND :
3805         return "&"; //$NON-NLS-1$
3806       case TokenNameMULTIPLY :
3807         return "*"; //$NON-NLS-1$
3808       case TokenNameOR :
3809         return "|"; //$NON-NLS-1$
3810       case TokenNameTWIDDLE :
3811         return "~"; //$NON-NLS-1$
3812       case TokenNameTWIDDLE_EQUAL :
3813         return "~="; //$NON-NLS-1$
3814       case TokenNameDIVIDE :
3815         return "/"; //$NON-NLS-1$
3816       case TokenNameGREATER :
3817         return ">"; //$NON-NLS-1$
3818       case TokenNameLESS :
3819         return "<"; //$NON-NLS-1$
3820       case TokenNameLPAREN :
3821         return "("; //$NON-NLS-1$
3822       case TokenNameRPAREN :
3823         return ")"; //$NON-NLS-1$
3824       case TokenNameLBRACE :
3825         return "{"; //$NON-NLS-1$
3826       case TokenNameRBRACE :
3827         return "}"; //$NON-NLS-1$
3828       case TokenNameLBRACKET :
3829         return "["; //$NON-NLS-1$
3830       case TokenNameRBRACKET :
3831         return "]"; //$NON-NLS-1$
3832       case TokenNameSEMICOLON :
3833         return ";"; //$NON-NLS-1$
3834       case TokenNameQUESTION :
3835         return "?"; //$NON-NLS-1$
3836       case TokenNameCOLON :
3837         return ":"; //$NON-NLS-1$
3838       case TokenNameCOMMA :
3839         return ","; //$NON-NLS-1$
3840       case TokenNameDOT :
3841         return "."; //$NON-NLS-1$
3842       case TokenNameEQUAL :
3843         return "="; //$NON-NLS-1$
3844       case TokenNameAT :
3845         return "@";
3846       case TokenNameDOLLAR :
3847         return "$";
3848       case TokenNameDOLLAR_LBRACE :
3849         return "${";
3850       case TokenNameEOF :
3851         return "EOF"; //$NON-NLS-1$
3852       case TokenNameWHITESPACE :
3853         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3854       case TokenNameCOMMENT_LINE :
3855         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3856       case TokenNameCOMMENT_BLOCK :
3857         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3858       case TokenNameCOMMENT_PHPDOC :
3859         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3860       //      case TokenNameHTML :
3861       //        return "HTML(" + new String(getCurrentTokenSource()) + ")";
3862       // //$NON-NLS-1$
3863       case TokenNameFILE :
3864         return "__FILE__"; //$NON-NLS-1$
3865       case TokenNameLINE :
3866         return "__LINE__"; //$NON-NLS-1$
3867       case TokenNameCLASS_C :
3868         return "__CLASS__"; //$NON-NLS-1$
3869       case TokenNameMETHOD_C :
3870         return "__METHOD__"; //$NON-NLS-1$
3871       case TokenNameFUNC_C :
3872         return "__FUNCTION__"; //$NON-NLS-1
3873       case TokenNameboolCAST :
3874         return "( bool )"; //$NON-NLS-1$
3875       case TokenNameintCAST :
3876         return "( int )"; //$NON-NLS-1$
3877       case TokenNamedoubleCAST :
3878         return "( double )"; //$NON-NLS-1$
3879       case TokenNameobjectCAST :
3880         return "( object )"; //$NON-NLS-1$
3881       case TokenNamestringCAST :
3882         return "( string )"; //$NON-NLS-1$
3883       default :
3884         return "not-a-token(" + (new Integer(act)) + ") "
3885             + new String(getCurrentTokenSource()); //$NON-NLS-1$
3886     }
3887   }
3888   
3889   public Scanner() {
3890     this(false, false);
3891   }
3892   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
3893     this(tokenizeComments, tokenizeWhiteSpace, false);
3894   }
3895   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3896       boolean checkNonExternalizedStringLiterals) {
3897     this(tokenizeComments, tokenizeWhiteSpace,
3898         checkNonExternalizedStringLiterals, false);
3899   }
3900   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3901       boolean checkNonExternalizedStringLiterals, boolean assertMode) {
3902     this(tokenizeComments, tokenizeWhiteSpace,
3903         checkNonExternalizedStringLiterals, assertMode, false, null, null);
3904   }
3905   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace,
3906       boolean checkNonExternalizedStringLiterals, boolean assertMode,
3907       boolean tokenizeStrings,
3908       char[][] taskTags,
3909           char[][] taskPriorities) {
3910     this.eofPosition = Integer.MAX_VALUE;
3911     this.tokenizeComments = tokenizeComments;
3912     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3913     this.tokenizeStrings = tokenizeStrings;
3914     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3915     this.assertMode = assertMode;
3916     this.encapsedStringStack = null;
3917     this.taskTags = taskTags;
3918         this.taskPriorities = taskPriorities;
3919   }
3920   private void checkNonExternalizeString() throws InvalidInputException {
3921     if (currentLine == null)
3922       return;
3923     parseTags(currentLine);
3924   }
3925   private void parseTags(NLSLine line) throws InvalidInputException {
3926     String s = new String(getCurrentTokenSource());
3927     int pos = s.indexOf(TAG_PREFIX);
3928     int lineLength = line.size();
3929     while (pos != -1) {
3930       int start = pos + TAG_PREFIX_LENGTH;
3931       int end = s.indexOf(TAG_POSTFIX, start);
3932       String index = s.substring(start, end);
3933       int i = 0;
3934       try {
3935         i = Integer.parseInt(index) - 1;
3936         // Tags are one based not zero based.
3937       } catch (NumberFormatException e) {
3938         i = -1; // we don't want to consider this as a valid NLS tag
3939       }
3940       if (line.exists(i)) {
3941         line.set(i, null);
3942       }
3943       pos = s.indexOf(TAG_PREFIX, start);
3944     }
3945     this.nonNLSStrings = new StringLiteral[lineLength];
3946     int nonNLSCounter = 0;
3947     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3948       StringLiteral literal = (StringLiteral) iterator.next();
3949       if (literal != null) {
3950         this.nonNLSStrings[nonNLSCounter++] = literal;
3951       }
3952     }
3953     if (nonNLSCounter == 0) {
3954       this.nonNLSStrings = null;
3955       currentLine = null;
3956       return;
3957     }
3958     this.wasNonExternalizedStringLiteral = true;
3959     if (nonNLSCounter != lineLength) {
3960       System.arraycopy(this.nonNLSStrings, 0,
3961           (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0,
3962           nonNLSCounter);
3963     }
3964     currentLine = null;
3965   }
3966   public final void scanEscapeCharacter() throws InvalidInputException {
3967     // the string with "\\u" is a legal string of two chars \ and u
3968     //thus we use a direct access to the source (for regular cases).
3969     if (unicodeAsBackSlash) {
3970       // consume next character
3971       unicodeAsBackSlash = false;
3972       //                        if (((currentCharacter = source[currentPosition++]) == '\\') &&
3973       // (source[currentPosition] == 'u')) {
3974       //                                getNextUnicodeChar();
3975       //                        } else {
3976       if (withoutUnicodePtr != 0) {
3977         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3978         //                              }
3979       }
3980     } else
3981       currentCharacter = source[currentPosition++];
3982     switch (currentCharacter) {
3983       case 'b' :
3984         currentCharacter = '\b';
3985         break;
3986       case 't' :
3987         currentCharacter = '\t';
3988         break;
3989       case 'n' :
3990         currentCharacter = '\n';
3991         break;
3992       case 'f' :
3993         currentCharacter = '\f';
3994         break;
3995       case 'r' :
3996         currentCharacter = '\r';
3997         break;
3998       case '\"' :
3999         currentCharacter = '\"';
4000         break;
4001       case '\'' :
4002         currentCharacter = '\'';
4003         break;
4004       case '\\' :
4005         currentCharacter = '\\';
4006         break;
4007       default :
4008         // -----------octal escape--------------
4009         // OctalDigit
4010         // OctalDigit OctalDigit
4011         // ZeroToThree OctalDigit OctalDigit
4012         int number = Character.getNumericValue(currentCharacter);
4013         if (number >= 0 && number <= 7) {
4014           boolean zeroToThreeNot = number > 3;
4015           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
4016             int digit = Character.getNumericValue(currentCharacter);
4017             if (digit >= 0 && digit <= 7) {
4018               number = (number * 8) + digit;
4019               if (Character
4020                   .isDigit(currentCharacter = source[currentPosition++])) {
4021                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit
4022                   // Digit --> ignore last character
4023                   currentPosition--;
4024                 } else {
4025                   digit = Character.getNumericValue(currentCharacter);
4026                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree
4027                     // OctalDigit OctalDigit
4028                     number = (number * 8) + digit;
4029                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit
4030                     // --> ignore last character
4031                     currentPosition--;
4032                   }
4033                 }
4034               } else { // has read \OctalDigit NonDigit--> ignore last
4035                 // character
4036                 currentPosition--;
4037               }
4038             } else { // has read \OctalDigit NonOctalDigit--> ignore last
4039               // character
4040               currentPosition--;
4041             }
4042           } else { // has read \OctalDigit --> ignore last character
4043             currentPosition--;
4044           }
4045           if (number > 255)
4046             throw new InvalidInputException(INVALID_ESCAPE);
4047           currentCharacter = (char) number;
4048         } else
4049           throw new InvalidInputException(INVALID_ESCAPE);
4050     }
4051   }
4052   // chech presence of task: tags
4053   public void checkTaskTag(int commentStart, int commentEnd) {
4054     // only look for newer task: tags
4055     if (this.foundTaskCount > 0
4056         && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
4057       return;
4058     }
4059     int foundTaskIndex = this.foundTaskCount;
4060     nextChar : for (int i = commentStart; i < commentEnd
4061         && i < this.eofPosition; i++) {
4062       char[] tag = null;
4063       char[] priority = null;
4064       // check for tag occurrence
4065       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
4066         tag = this.taskTags[itag];
4067         priority = this.taskPriorities != null
4068             && itag < this.taskPriorities.length
4069             ? this.taskPriorities[itag]
4070             : null;
4071         int tagLength = tag.length;
4072         for (int t = 0; t < tagLength; t++) {
4073           if (this.source[i + t] != tag[t])
4074             continue nextTag;
4075         }
4076         if (this.foundTaskTags == null) {
4077           this.foundTaskTags = new char[5][];
4078           this.foundTaskMessages = new char[5][];
4079           this.foundTaskPriorities = new char[5][];
4080           this.foundTaskPositions = new int[5][];
4081         } else if (this.foundTaskCount == this.foundTaskTags.length) {
4082           System.arraycopy(this.foundTaskTags, 0,
4083               this.foundTaskTags = new char[this.foundTaskCount * 2][], 0,
4084               this.foundTaskCount);
4085           System.arraycopy(this.foundTaskMessages, 0,
4086               this.foundTaskMessages = new char[this.foundTaskCount * 2][], 0,
4087               this.foundTaskCount);
4088           System.arraycopy(this.foundTaskPriorities, 0,
4089               this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
4090               0, this.foundTaskCount);
4091           System.arraycopy(this.foundTaskPositions, 0,
4092               this.foundTaskPositions = new int[this.foundTaskCount * 2][], 0,
4093               this.foundTaskCount);
4094         }
4095         this.foundTaskTags[this.foundTaskCount] = tag;
4096         this.foundTaskPriorities[this.foundTaskCount] = priority;
4097         this.foundTaskPositions[this.foundTaskCount] = new int[]{i,
4098             i + tagLength - 1};
4099         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
4100         this.foundTaskCount++;
4101         i += tagLength - 1; // will be incremented when looping
4102       }
4103     }
4104     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
4105       // retrieve message start and end positions
4106       int msgStart = this.foundTaskPositions[i][0]
4107           + this.foundTaskTags[i].length;
4108       int max_value = i + 1 < this.foundTaskCount
4109           ? this.foundTaskPositions[i + 1][0] - 1
4110           : commentEnd - 1;
4111       // at most beginning of next task
4112       if (max_value < msgStart)
4113         max_value = msgStart; // would only occur if tag is before EOF.
4114       int end = -1;
4115       char c;
4116       for (int j = msgStart; j < max_value; j++) {
4117         if ((c = this.source[j]) == '\n' || c == '\r') {
4118           end = j - 1;
4119           break;
4120         }
4121       }
4122       if (end == -1) {
4123         for (int j = max_value; j > msgStart; j--) {
4124           if ((c = this.source[j]) == '*') {
4125             end = j - 1;
4126             break;
4127           }
4128         }
4129         if (end == -1)
4130           end = max_value;
4131       }
4132       if (msgStart == end)
4133         continue; // empty
4134       // trim the message
4135       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
4136         end--;
4137       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
4138         msgStart++;
4139       // update the end position of the task
4140       this.foundTaskPositions[i][1] = end;
4141       // get the message source
4142       final int messageLength = end - msgStart + 1;
4143       char[] message = new char[messageLength];
4144       System.arraycopy(source, msgStart, message, 0, messageLength);
4145       this.foundTaskMessages[i] = message;
4146     }
4147   }
4148 }