misc parser bugfixes; still very ugly state
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16
17 import net.sourceforge.phpdt.core.compiler.CharOperation;
18 import net.sourceforge.phpdt.core.compiler.IScanner;
19 import net.sourceforge.phpdt.core.compiler.ITerminalSymbols;
20 import net.sourceforge.phpdt.core.compiler.InvalidInputException;
21 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
22
23 public class Scanner implements IScanner, ITerminalSymbols {
24
25   /*
26    * APIs ares - getNextToken() which return the current type of the token (this value is not memorized by the scanner) -
27    * getCurrentTokenSource() which provides with the token "REAL" source (aka all unicode have been transformed into a correct
28    * char) - sourceStart gives the position into the stream - currentPosition-1 gives the sourceEnd position into the stream
29    */
30
31   // 1.4 feature
32   private boolean assertMode;
33   public boolean useAssertAsAnIndentifier = false;
34   //flag indicating if processed source contains occurrences of keyword assert
35   public boolean containsAssertKeyword = false;
36
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the source
45
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52
53   //unicode support
54   public char[] withoutUnicodeBuffer;
55   public int withoutUnicodePtr;
56   //when == 0 ==> no unicode in the current token
57   public boolean unicodeAsBackSlash = false;
58
59   public boolean scanningFloatLiteral = false;
60
61   //support for /** comments
62   //public char[][] comments = new char[10][];
63   public int[] commentStops = new int[10];
64   public int[] commentStarts = new int[10];
65   public int commentPtr = -1; // no comment test with commentPtr value -1
66
67   //diet parsing support - jump over some method body when requested
68   public boolean diet = false;
69
70   //support for the poor-line-debuggers ....
71   //remember the position of the cr/lf
72   public int[] lineEnds = new int[250];
73   public int linePtr = -1;
74   public boolean wasAcr = false;
75
76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
77
78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
85
86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
90
91   //----------------optimized identifier managment------------------
92   static final char[] charArray_a = new char[] { 'a' },
93     charArray_b = new char[] { 'b' },
94     charArray_c = new char[] { 'c' },
95     charArray_d = new char[] { 'd' },
96     charArray_e = new char[] { 'e' },
97     charArray_f = new char[] { 'f' },
98     charArray_g = new char[] { 'g' },
99     charArray_h = new char[] { 'h' },
100     charArray_i = new char[] { 'i' },
101     charArray_j = new char[] { 'j' },
102     charArray_k = new char[] { 'k' },
103     charArray_l = new char[] { 'l' },
104     charArray_m = new char[] { 'm' },
105     charArray_n = new char[] { 'n' },
106     charArray_o = new char[] { 'o' },
107     charArray_p = new char[] { 'p' },
108     charArray_q = new char[] { 'q' },
109     charArray_r = new char[] { 'r' },
110     charArray_s = new char[] { 's' },
111     charArray_t = new char[] { 't' },
112     charArray_u = new char[] { 'u' },
113     charArray_v = new char[] { 'v' },
114     charArray_w = new char[] { 'w' },
115     charArray_x = new char[] { 'x' },
116     charArray_y = new char[] { 'y' },
117     charArray_z = new char[] { 'z' };
118
119   static final char[] initCharArray = new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
120   static final int TableSize = 30, InternalTableSize = 6;
121   //30*6 = 180 entries
122   public static final int OptimizedLength = 6;
123   public /* static */
124   final char[][][][] charArray_length = new char[OptimizedLength][TableSize][InternalTableSize][];
125   // support for detecting non-externalized string literals
126   int currentLineNr = -1;
127   int previousLineNr = -1;
128   NLSLine currentLine = null;
129   List lines = new ArrayList();
130   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
131   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
132   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
133   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
134   public StringLiteral[] nonNLSStrings = null;
135   public boolean checkNonExternalizedStringLiterals = true;
136   public boolean wasNonExternalizedStringLiteral = false;
137
138   /* static */ {
139     for (int i = 0; i < 6; i++) {
140       for (int j = 0; j < TableSize; j++) {
141         for (int k = 0; k < InternalTableSize; k++) {
142           charArray_length[i][j][k] = initCharArray;
143         }
144       }
145     }
146   }
147   static int newEntry2 = 0, newEntry3 = 0, newEntry4 = 0, newEntry5 = 0, newEntry6 = 0;
148
149   public static final int RoundBracket = 0;
150   public static final int SquareBracket = 1;
151   public static final int CurlyBracket = 2;
152   public static final int BracketKinds = 3;
153
154   // task tag support
155   public char[][] foundTaskTags = null;
156   public char[][] foundTaskMessages;
157   public char[][] foundTaskPriorities = null;
158   public int[][] foundTaskPositions;
159   public int foundTaskCount = 0;
160   public char[][] taskTags = null;
161   public char[][] taskPriorities = null;
162
163   public static final boolean DEBUG = true;
164
165   public Scanner() {
166     this(false, false);
167   }
168   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
169     this(tokenizeComments, tokenizeWhiteSpace, false);
170   }
171
172   /**
173    * Determines if the specified character is permissible as the first character in a PHP identifier
174    */
175   public static boolean isPHPIdentifierStart(char ch) {
176     return Character.isLetter(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
177   }
178
179   /**
180    * Determines if the specified character may be part of a PHP identifier as other than the first character
181    */
182   public static boolean isPHPIdentifierPart(char ch) {
183     return Character.isLetterOrDigit(ch) || (ch == '_') || (0x7F <= ch && ch <= 0xFF);
184   }
185
186   public final boolean atEnd() {
187     // This code is not relevant if source is
188     // Only a part of the real stream input
189
190     return source.length == currentPosition;
191   }
192   public char[] getCurrentIdentifierSource() {
193     //return the token REAL source (aka unicodes are precomputed)
194
195     char[] result;
196     //    if (withoutUnicodePtr != 0)
197     //      //0 is used as a fast test flag so the real first char is in position 1
198     //      System.arraycopy(
199     //        withoutUnicodeBuffer,
200     //        1,
201     //        result = new char[withoutUnicodePtr],
202     //        0,
203     //        withoutUnicodePtr);
204     //    else {
205     int length = currentPosition - startPosition;
206     switch (length) { // see OptimizedLength
207       case 1 :
208         return optimizedCurrentTokenSource1();
209       case 2 :
210         return optimizedCurrentTokenSource2();
211       case 3 :
212         return optimizedCurrentTokenSource3();
213       case 4 :
214         return optimizedCurrentTokenSource4();
215       case 5 :
216         return optimizedCurrentTokenSource5();
217       case 6 :
218         return optimizedCurrentTokenSource6();
219     }
220     //no optimization
221     System.arraycopy(source, startPosition, result = new char[length], 0, length);
222     //   }
223     return result;
224   }
225   public int getCurrentTokenEndPosition() {
226     return this.currentPosition - 1;
227   }
228
229   public final char[] getCurrentTokenSource() {
230     // Return the token REAL source (aka unicodes are precomputed)
231
232     char[] result;
233     //    if (withoutUnicodePtr != 0)
234     //      // 0 is used as a fast test flag so the real first char is in position 1
235     //      System.arraycopy(
236     //        withoutUnicodeBuffer,
237     //        1,
238     //        result = new char[withoutUnicodePtr],
239     //        0,
240     //        withoutUnicodePtr);
241     //    else {
242     int length;
243     System.arraycopy(source, startPosition, result = new char[length = currentPosition - startPosition], 0, length);
244     //    }
245     return result;
246   }
247
248   public final char[] getCurrentTokenSource(int startPos) {
249     // Return the token REAL source (aka unicodes are precomputed)
250
251     char[] result;
252     //    if (withoutUnicodePtr != 0)
253     //      // 0 is used as a fast test flag so the real first char is in position 1
254     //      System.arraycopy(
255     //        withoutUnicodeBuffer,
256     //        1,
257     //        result = new char[withoutUnicodePtr],
258     //        0,
259     //        withoutUnicodePtr);
260     //    else {
261     int length;
262     System.arraycopy(source, startPos, result = new char[length = currentPosition - startPos], 0, length);
263     //  }
264     return result;
265   }
266
267   public final char[] getCurrentTokenSourceString() {
268     //return the token REAL source (aka unicodes are precomputed).
269     //REMOVE the two " that are at the beginning and the end.
270
271     char[] result;
272     if (withoutUnicodePtr != 0)
273       //0 is used as a fast test flag so the real first char is in position 1
274       System.arraycopy(withoutUnicodeBuffer, 2,
275       //2 is 1 (real start) + 1 (to jump over the ")
276       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
277     else {
278       int length;
279       System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
280     }
281     return result;
282   }
283   public int getCurrentTokenStartPosition() {
284     return this.startPosition;
285   }
286
287   public final char[] getCurrentStringLiteralSource() {
288     // Return the token REAL source (aka unicodes are precomputed)
289
290     char[] result;
291
292     int length;
293     System.arraycopy(source, startPosition + 1, result = new char[length = currentPosition - startPosition - 2], 0, length);
294     //    }
295     return result;
296   }
297
298   /*
299    * Search the source position corresponding to the end of a given line number
300    * 
301    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
302    * 
303    * In case the given line number is inconsistent, answers -1.
304    */
305   public final int getLineEnd(int lineNumber) {
306
307     if (lineEnds == null)
308       return -1;
309     if (lineNumber >= lineEnds.length)
310       return -1;
311     if (lineNumber <= 0)
312       return -1;
313
314     if (lineNumber == lineEnds.length - 1)
315       return eofPosition;
316     return lineEnds[lineNumber - 1];
317     // next line start one character behind the lineEnd of the previous line
318   }
319   /**
320    * Search the source position corresponding to the beginning of a given line number
321    * 
322    * Line numbers are 1-based, and relative to the scanner initialPosition. Character positions are 0-based.
323    * 
324    * e.g. getLineStart(1) --> 0 i.e. first line starts at character 0.
325    * 
326    * In case the given line number is inconsistent, answers -1.
327    */
328   public final int getLineStart(int lineNumber) {
329
330     if (lineEnds == null)
331       return -1;
332     if (lineNumber >= lineEnds.length)
333       return -1;
334     if (lineNumber <= 0)
335       return -1;
336
337     if (lineNumber == 1)
338       return initialPosition;
339     return lineEnds[lineNumber - 2] + 1;
340     // next line start one character behind the lineEnd of the previous line
341   }
342   public final boolean getNextChar(char testedChar) {
343     //BOOLEAN
344     //handle the case of unicode.
345     //when a unicode appears then we must use a buffer that holds char internal values
346     //At the end of this method currentCharacter holds the new visited char
347     //and currentPosition points right next after it
348     //Both previous lines are true if the currentCharacter is == to the testedChar
349     //On false, no side effect has occured.
350
351     //ALL getNextChar.... ARE OPTIMIZED COPIES
352
353     int temp = currentPosition;
354     try {
355       currentCharacter = source[currentPosition++];
356       //      if (((currentCharacter = source[currentPosition++]) == '\\')
357       //        && (source[currentPosition] == 'u')) {
358       //        //-------------unicode traitement ------------
359       //        int c1, c2, c3, c4;
360       //        int unicodeSize = 6;
361       //        currentPosition++;
362       //        while (source[currentPosition] == 'u') {
363       //          currentPosition++;
364       //          unicodeSize++;
365       //        }
366       //
367       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
368       //          || c1 < 0)
369       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
370       //            || c2 < 0)
371       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
372       //            || c3 < 0)
373       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
374       //            || c4 < 0)) {
375       //          currentPosition = temp;
376       //          return false;
377       //        }
378       //
379       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
380       //        if (currentCharacter != testedChar) {
381       //          currentPosition = temp;
382       //          return false;
383       //        }
384       //        unicodeAsBackSlash = currentCharacter == '\\';
385       //
386       //        //need the unicode buffer
387       //        if (withoutUnicodePtr == 0) {
388       //          //buffer all the entries that have been left aside....
389       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
390       //          System.arraycopy(
391       //            source,
392       //            startPosition,
393       //            withoutUnicodeBuffer,
394       //            1,
395       //            withoutUnicodePtr);
396       //        }
397       //        //fill the buffer with the char
398       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
399       //        return true;
400       //
401       //      } //-------------end unicode traitement--------------
402       //      else {
403       if (currentCharacter != testedChar) {
404         currentPosition = temp;
405         return false;
406       }
407       unicodeAsBackSlash = false;
408       //        if (withoutUnicodePtr != 0)
409       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
410       return true;
411       //      }
412     } catch (IndexOutOfBoundsException e) {
413       unicodeAsBackSlash = false;
414       currentPosition = temp;
415       return false;
416     }
417   }
418   public final int getNextChar(char testedChar1, char testedChar2) {
419     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
420     //test can be done with (x==0) for the first and (x>0) for the second
421     //handle the case of unicode.
422     //when a unicode appears then we must use a buffer that holds char internal values
423     //At the end of this method currentCharacter holds the new visited char
424     //and currentPosition points right next after it
425     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
426     //On false, no side effect has occured.
427
428     //ALL getNextChar.... ARE OPTIMIZED COPIES
429
430     int temp = currentPosition;
431     try {
432       int result;
433       currentCharacter = source[currentPosition++];
434       //      if (((currentCharacter = source[currentPosition++]) == '\\')
435       //        && (source[currentPosition] == 'u')) {
436       //        //-------------unicode traitement ------------
437       //        int c1, c2, c3, c4;
438       //        int unicodeSize = 6;
439       //        currentPosition++;
440       //        while (source[currentPosition] == 'u') {
441       //          currentPosition++;
442       //          unicodeSize++;
443       //        }
444       //
445       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
446       //          || c1 < 0)
447       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
448       //            || c2 < 0)
449       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
450       //            || c3 < 0)
451       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
452       //            || c4 < 0)) {
453       //          currentPosition = temp;
454       //          return 2;
455       //        }
456       //
457       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
458       //        if (currentCharacter == testedChar1)
459       //          result = 0;
460       //        else if (currentCharacter == testedChar2)
461       //          result = 1;
462       //        else {
463       //          currentPosition = temp;
464       //          return -1;
465       //        }
466       //
467       //        //need the unicode buffer
468       //        if (withoutUnicodePtr == 0) {
469       //          //buffer all the entries that have been left aside....
470       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
471       //          System.arraycopy(
472       //            source,
473       //            startPosition,
474       //            withoutUnicodeBuffer,
475       //            1,
476       //            withoutUnicodePtr);
477       //        }
478       //        //fill the buffer with the char
479       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
480       //        return result;
481       //      } //-------------end unicode traitement--------------
482       //      else {
483       if (currentCharacter == testedChar1)
484         result = 0;
485       else if (currentCharacter == testedChar2)
486         result = 1;
487       else {
488         currentPosition = temp;
489         return -1;
490       }
491
492       //        if (withoutUnicodePtr != 0)
493       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
494       return result;
495       //     }
496     } catch (IndexOutOfBoundsException e) {
497       currentPosition = temp;
498       return -1;
499     }
500   }
501   public final boolean getNextCharAsDigit() {
502     //BOOLEAN
503     //handle the case of unicode.
504     //when a unicode appears then we must use a buffer that holds char internal values
505     //At the end of this method currentCharacter holds the new visited char
506     //and currentPosition points right next after it
507     //Both previous lines are true if the currentCharacter is a digit
508     //On false, no side effect has occured.
509
510     //ALL getNextChar.... ARE OPTIMIZED COPIES
511
512     int temp = currentPosition;
513     try {
514       currentCharacter = source[currentPosition++];
515       //      if (((currentCharacter = source[currentPosition++]) == '\\')
516       //        && (source[currentPosition] == 'u')) {
517       //        //-------------unicode traitement ------------
518       //        int c1, c2, c3, c4;
519       //        int unicodeSize = 6;
520       //        currentPosition++;
521       //        while (source[currentPosition] == 'u') {
522       //          currentPosition++;
523       //          unicodeSize++;
524       //        }
525       //
526       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
527       //          || c1 < 0)
528       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
529       //            || c2 < 0)
530       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
531       //            || c3 < 0)
532       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
533       //            || c4 < 0)) {
534       //          currentPosition = temp;
535       //          return false;
536       //        }
537       //
538       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
539       //        if (!Character.isDigit(currentCharacter)) {
540       //          currentPosition = temp;
541       //          return false;
542       //        }
543       //
544       //        //need the unicode buffer
545       //        if (withoutUnicodePtr == 0) {
546       //          //buffer all the entries that have been left aside....
547       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
548       //          System.arraycopy(
549       //            source,
550       //            startPosition,
551       //            withoutUnicodeBuffer,
552       //            1,
553       //            withoutUnicodePtr);
554       //        }
555       //        //fill the buffer with the char
556       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
557       //        return true;
558       //      } //-------------end unicode traitement--------------
559       //      else {
560       if (!Character.isDigit(currentCharacter)) {
561         currentPosition = temp;
562         return false;
563       }
564       //        if (withoutUnicodePtr != 0)
565       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
566       return true;
567       //      }
568     } catch (IndexOutOfBoundsException e) {
569       currentPosition = temp;
570       return false;
571     }
572   }
573   public final boolean getNextCharAsDigit(int radix) {
574     //BOOLEAN
575     //handle the case of unicode.
576     //when a unicode appears then we must use a buffer that holds char internal values
577     //At the end of this method currentCharacter holds the new visited char
578     //and currentPosition points right next after it
579     //Both previous lines are true if the currentCharacter is a digit base on radix
580     //On false, no side effect has occured.
581
582     //ALL getNextChar.... ARE OPTIMIZED COPIES
583
584     int temp = currentPosition;
585     try {
586       currentCharacter = source[currentPosition++];
587       //      if (((currentCharacter = source[currentPosition++]) == '\\')
588       //        && (source[currentPosition] == 'u')) {
589       //        //-------------unicode traitement ------------
590       //        int c1, c2, c3, c4;
591       //        int unicodeSize = 6;
592       //        currentPosition++;
593       //        while (source[currentPosition] == 'u') {
594       //          currentPosition++;
595       //          unicodeSize++;
596       //        }
597       //
598       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
599       //          || c1 < 0)
600       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
601       //            || c2 < 0)
602       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
603       //            || c3 < 0)
604       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
605       //            || c4 < 0)) {
606       //          currentPosition = temp;
607       //          return false;
608       //        }
609       //
610       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
611       //        if (Character.digit(currentCharacter, radix) == -1) {
612       //          currentPosition = temp;
613       //          return false;
614       //        }
615       //
616       //        //need the unicode buffer
617       //        if (withoutUnicodePtr == 0) {
618       //          //buffer all the entries that have been left aside....
619       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
620       //          System.arraycopy(
621       //            source,
622       //            startPosition,
623       //            withoutUnicodeBuffer,
624       //            1,
625       //            withoutUnicodePtr);
626       //        }
627       //        //fill the buffer with the char
628       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
629       //        return true;
630       //      } //-------------end unicode traitement--------------
631       //      else {
632       if (Character.digit(currentCharacter, radix) == -1) {
633         currentPosition = temp;
634         return false;
635       }
636       //        if (withoutUnicodePtr != 0)
637       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
638       return true;
639       //      }
640     } catch (IndexOutOfBoundsException e) {
641       currentPosition = temp;
642       return false;
643     }
644   }
645   public boolean getNextCharAsJavaIdentifierPart() {
646     //BOOLEAN
647     //handle the case of unicode.
648     //when a unicode appears then we must use a buffer that holds char internal values
649     //At the end of this method currentCharacter holds the new visited char
650     //and currentPosition points right next after it
651     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
652     //On false, no side effect has occured.
653
654     //ALL getNextChar.... ARE OPTIMIZED COPIES
655
656     int temp = currentPosition;
657     try {
658       currentCharacter = source[currentPosition++];
659       //      if (((currentCharacter = source[currentPosition++]) == '\\')
660       //        && (source[currentPosition] == 'u')) {
661       //        //-------------unicode traitement ------------
662       //        int c1, c2, c3, c4;
663       //        int unicodeSize = 6;
664       //        currentPosition++;
665       //        while (source[currentPosition] == 'u') {
666       //          currentPosition++;
667       //          unicodeSize++;
668       //        }
669       //
670       //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
671       //          || c1 < 0)
672       //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
673       //            || c2 < 0)
674       //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
675       //            || c3 < 0)
676       //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
677       //            || c4 < 0)) {
678       //          currentPosition = temp;
679       //          return false;
680       //        }
681       //
682       //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
683       //        if (!isPHPIdentifierPart(currentCharacter)) {
684       //          currentPosition = temp;
685       //          return false;
686       //        }
687       //
688       //        //need the unicode buffer
689       //        if (withoutUnicodePtr == 0) {
690       //          //buffer all the entries that have been left aside....
691       //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
692       //          System.arraycopy(
693       //            source,
694       //            startPosition,
695       //            withoutUnicodeBuffer,
696       //            1,
697       //            withoutUnicodePtr);
698       //        }
699       //        //fill the buffer with the char
700       //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
701       //        return true;
702       //      } //-------------end unicode traitement--------------
703       //      else {
704       if (!isPHPIdentifierPart(currentCharacter)) {
705         currentPosition = temp;
706         return false;
707       }
708
709       //        if (withoutUnicodePtr != 0)
710       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
711       return true;
712       //      }
713     } catch (IndexOutOfBoundsException e) {
714       currentPosition = temp;
715       return false;
716     }
717   }
718
719   public int getNextToken() throws InvalidInputException {
720     int htmlPosition = currentPosition;
721     try {
722       while (!phpMode) {
723         currentCharacter = source[currentPosition++];
724         if (currentCharacter == '<') {
725           if (getNextChar('?')) {
726             currentCharacter = source[currentPosition++];
727             if ((currentCharacter == ' ') || Character.isWhitespace(currentCharacter)) {
728               // <?
729               startPosition = currentPosition;
730               phpMode = true;
731               if (tokenizeWhiteSpace) {
732                 // && (whiteStart != currentPosition - 1)) {
733                 // reposition scanner in case we are interested by spaces as tokens
734                 startPosition = htmlPosition;
735                 return TokenNameHTML;
736               }
737             } else {
738               boolean phpStart = (currentCharacter == 'P') || (currentCharacter == 'p');
739               if (phpStart) {
740                 int test = getNextChar('H', 'h');
741                 if (test >= 0) {
742                   test = getNextChar('P', 'p');
743                   if (test >= 0) {
744                     // <?PHP <?php
745                     startPosition = currentPosition;
746                     phpMode = true;
747
748                     if (tokenizeWhiteSpace) {
749                       // && (whiteStart != currentPosition - 1)) {
750                       // reposition scanner in case we are interested by spaces as tokens
751                       startPosition = htmlPosition;
752                       return TokenNameHTML;
753                     }
754                   }
755                 }
756               }
757             }
758           }
759         }
760
761         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
762           if (recordLineSeparator) {
763             pushLineSeparator();
764           } else {
765             currentLine = null;
766           }
767         }
768       }
769     } //-----------------end switch while try--------------------
770     catch (IndexOutOfBoundsException e) {
771       if (tokenizeWhiteSpace) {
772         // && (whiteStart != currentPosition - 1)) {
773         // reposition scanner in case we are interested by spaces as tokens
774         startPosition = htmlPosition;
775       }
776       return TokenNameEOF;
777     }
778
779     if (phpMode) {
780       this.wasAcr = false;
781       if (diet) {
782         jumpOverMethodBody();
783         diet = false;
784         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
785       }
786       try {
787         while (true) { //loop for jumping over comments
788           withoutUnicodePtr = 0;
789           //start with a new token (even comment written with unicode )
790
791           // ---------Consume white space and handles startPosition---------
792           int whiteStart = currentPosition;
793           boolean isWhiteSpace;
794           do {
795             startPosition = currentPosition;
796             currentCharacter = source[currentPosition++];
797             //            if (((currentCharacter = source[currentPosition++]) == '\\')
798             //              && (source[currentPosition] == 'u')) {
799             //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
800             //            } else {
801             if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
802               checkNonExternalizeString();
803               if (recordLineSeparator) {
804                 pushLineSeparator();
805               } else {
806                 currentLine = null;
807               }
808             }
809             isWhiteSpace = (currentCharacter == ' ') || Character.isWhitespace(currentCharacter);
810             //            }
811           } while (isWhiteSpace);
812           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
813             // reposition scanner in case we are interested by spaces as tokens
814             currentPosition--;
815             startPosition = whiteStart;
816             return TokenNameWHITESPACE;
817           }
818           //little trick to get out in the middle of a source compuation
819           if (currentPosition > eofPosition)
820             return TokenNameEOF;
821
822           // ---------Identify the next token-------------
823
824           switch (currentCharacter) {
825             case '(' :
826               return TokenNameLPAREN;
827             case ')' :
828               return TokenNameRPAREN;
829             case '{' :
830               return TokenNameLBRACE;
831             case '}' :
832               return TokenNameRBRACE;
833             case '[' :
834               return TokenNameLBRACKET;
835             case ']' :
836               return TokenNameRBRACKET;
837             case ';' :
838               return TokenNameSEMICOLON;
839             case ',' :
840               return TokenNameCOMMA;
841
842             case '.' :
843               if (getNextCharAsDigit())
844                 return scanNumber(true);
845               return TokenNameDOT;
846             case '+' :
847               {
848                 int test;
849                 if ((test = getNextChar('+', '=')) == 0)
850                   return TokenNamePLUS_PLUS;
851                 if (test > 0)
852                   return TokenNamePLUS_EQUAL;
853                 return TokenNamePLUS;
854               }
855             case '-' :
856               {
857                 int test;
858                 if ((test = getNextChar('-', '=')) == 0)
859                   return TokenNameMINUS_MINUS;
860                 if (test > 0)
861                   return TokenNameMINUS_EQUAL;
862                 if (getNextChar('>'))
863                   return TokenNameMINUS_GREATER;
864
865                 return TokenNameMINUS;
866               }
867             case '~' :
868               if (getNextChar('='))
869                 return TokenNameTWIDDLE_EQUAL;
870               return TokenNameTWIDDLE;
871             case '!' :
872               if (getNextChar('=')) {
873                 if (getNextChar('=')) {
874                   return TokenNameNOT_EQUAL_EQUAL;
875                 }
876                 return TokenNameNOT_EQUAL;
877               }
878               return TokenNameNOT;
879             case '*' :
880               if (getNextChar('='))
881                 return TokenNameMULTIPLY_EQUAL;
882               return TokenNameMULTIPLY;
883             case '%' :
884               if (getNextChar('='))
885                 return TokenNameREMAINDER_EQUAL;
886               return TokenNameREMAINDER;
887             case '<' :
888               {
889                 int test;
890                 if ((test = getNextChar('=', '<')) == 0)
891                   return TokenNameLESS_EQUAL;
892                 if (test > 0) {
893                   if (getNextChar('='))
894                     return TokenNameLEFT_SHIFT_EQUAL;
895                   if (getNextChar('<')) {
896                     int heredocStart = currentPosition;
897                     int heredocLength = 0;
898                     currentCharacter = source[currentPosition++];
899                     if (isPHPIdentifierStart(currentCharacter)) {
900                       currentCharacter = source[currentPosition++];
901                     } else {
902                       return TokenNameERROR;
903                     }
904                     while (isPHPIdentifierPart(currentCharacter)) {
905                       currentCharacter = source[currentPosition++];
906                     }
907
908                     heredocLength = currentPosition - heredocStart - 1;
909
910                     // heredoc end-tag determination
911                     boolean endTag = true;
912                     char ch;
913                     do {
914                       ch = source[currentPosition++];
915                       if (ch == '\r' || ch == '\n') {
916                         if (recordLineSeparator) {
917                           pushLineSeparator();
918                         } else {
919                           currentLine = null;
920                         }
921                         for (int i = 0; i < heredocLength; i++) {
922                           if (source[currentPosition + i] != source[heredocStart + i]) {
923                             endTag = false;
924                             break;
925                           }
926                         }
927                         if (endTag) {
928                           currentPosition += heredocLength - 1;
929                           currentCharacter = source[currentPosition++];
930                           break; // do...while loop
931                         } else {
932                           endTag = true;
933                         }
934                       }
935
936                     } while (true);
937
938                     return TokenNameHEREDOC;
939                   }
940                   return TokenNameLEFT_SHIFT;
941                 }
942                 return TokenNameLESS;
943               }
944             case '>' :
945               {
946                 int test;
947                 if ((test = getNextChar('=', '>')) == 0)
948                   return TokenNameGREATER_EQUAL;
949                 if (test > 0) {
950                   if ((test = getNextChar('=', '>')) == 0)
951                     return TokenNameRIGHT_SHIFT_EQUAL;
952                   return TokenNameRIGHT_SHIFT;
953                 }
954                 return TokenNameGREATER;
955               }
956             case '=' :
957               if (getNextChar('=')) {
958                 if (getNextChar('=')) {
959                   return TokenNameEQUAL_EQUAL_EQUAL;
960                 }
961                 return TokenNameEQUAL_EQUAL;
962               }
963               if (getNextChar('>'))
964                 return TokenNameEQUAL_GREATER;
965               return TokenNameEQUAL;
966             case '&' :
967               {
968                 int test;
969                 if ((test = getNextChar('&', '=')) == 0)
970                   return TokenNameAND_AND;
971                 if (test > 0)
972                   return TokenNameAND_EQUAL;
973                 return TokenNameAND;
974               }
975             case '|' :
976               {
977                 int test;
978                 if ((test = getNextChar('|', '=')) == 0)
979                   return TokenNameOR_OR;
980                 if (test > 0)
981                   return TokenNameOR_EQUAL;
982                 return TokenNameOR;
983               }
984             case '^' :
985               if (getNextChar('='))
986                 return TokenNameXOR_EQUAL;
987               return TokenNameXOR;
988             case '?' :
989               if (getNextChar('>')) {
990                 phpMode = false;
991                 return TokenNameStopPHP;
992               }
993               return TokenNameQUESTION;
994             case ':' :
995               if (getNextChar(':'))
996                 return TokenNameCOLON_COLON;
997               return TokenNameCOLON;
998             case '@' :
999               return TokenNameAT;
1000               //                                        case '\'' :
1001               //                                                {
1002               //                                                        int test;
1003               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1004               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1005               //                                                        }
1006               //                                                        if (test > 0) {
1007               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1008               //                                                                for (int lookAhead = 0;
1009               //                                                                        lookAhead < 3;
1010               //                                                                        lookAhead++) {
1011               //                                                                        if (currentPosition + lookAhead
1012               //                                                                                == source.length)
1013               //                                                                                break;
1014               //                                                                        if (source[currentPosition + lookAhead]
1015               //                                                                                == '\n')
1016               //                                                                                break;
1017               //                                                                        if (source[currentPosition + lookAhead]
1018               //                                                                                == '\'') {
1019               //                                                                                currentPosition += lookAhead + 1;
1020               //                                                                                break;
1021               //                                                                        }
1022               //                                                                }
1023               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1024               //                                                        }
1025               //                                                }
1026               //                                                if (getNextChar('\'')) {
1027               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1028               //                                                        for (int lookAhead = 0;
1029               //                                                                lookAhead < 3;
1030               //                                                                lookAhead++) {
1031               //                                                                if (currentPosition + lookAhead
1032               //                                                                        == source.length)
1033               //                                                                        break;
1034               //                                                                if (source[currentPosition + lookAhead]
1035               //                                                                        == '\n')
1036               //                                                                        break;
1037               //                                                                if (source[currentPosition + lookAhead]
1038               //                                                                        == '\'') {
1039               //                                                                        currentPosition += lookAhead + 1;
1040               //                                                                        break;
1041               //                                                                }
1042               //                                                        }
1043               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1044               //                                                }
1045               //                                                if (getNextChar('\\'))
1046               //                                                        scanEscapeCharacter();
1047               //                                                else { // consume next character
1048               //                                                        unicodeAsBackSlash = false;
1049               //                                                        if (((currentCharacter = source[currentPosition++])
1050               //                                                                == '\\')
1051               //                                                                && (source[currentPosition] == 'u')) {
1052               //                                                                getNextUnicodeChar();
1053               //                                                        } else {
1054               //                                                                if (withoutUnicodePtr != 0) {
1055               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1056               //                                                                                currentCharacter;
1057               //                                                                }
1058               //                                                        }
1059               //                                                }
1060               //                                                // if (getNextChar('\''))
1061               //                                                // return TokenNameCharacterLiteral;
1062               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1063               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1064               //                                                        if (currentPosition + lookAhead == source.length)
1065               //                                                                break;
1066               //                                                        if (source[currentPosition + lookAhead] == '\n')
1067               //                                                                break;
1068               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1069               //                                                                currentPosition += lookAhead + 1;
1070               //                                                                break;
1071               //                                                        }
1072               //                                                }
1073               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1074             case '\'' :
1075               try {
1076                 // consume next character
1077                 unicodeAsBackSlash = false;
1078                 currentCharacter = source[currentPosition++];
1079                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1080                 //                  && (source[currentPosition] == 'u')) {
1081                 //                  getNextUnicodeChar();
1082                 //                } else {
1083                 //                  if (withoutUnicodePtr != 0) {
1084                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1085                 //                      currentCharacter;
1086                 //                  }
1087                 //                }
1088
1089                 while (currentCharacter != '\'') {
1090
1091                   /** ** in PHP \r and \n are valid in string literals *** */
1092                   //                  if ((currentCharacter == '\n')
1093                   //                    || (currentCharacter == '\r')) {
1094                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1095                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1096                   //                      if (currentPosition + lookAhead == source.length)
1097                   //                        break;
1098                   //                      if (source[currentPosition + lookAhead] == '\n')
1099                   //                        break;
1100                   //                      if (source[currentPosition + lookAhead] == '\"') {
1101                   //                        currentPosition += lookAhead + 1;
1102                   //                        break;
1103                   //                      }
1104                   //                    }
1105                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1106                   //                  }
1107                   if (currentCharacter == '\\') {
1108                     int escapeSize = currentPosition;
1109                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1110                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1111                     scanSingleQuotedEscapeCharacter();
1112                     escapeSize = currentPosition - escapeSize;
1113                     if (withoutUnicodePtr == 0) {
1114                       //buffer all the entries that have been left aside....
1115                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1116                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1117                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1118                     } else { //overwrite the / in the buffer
1119                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1120                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1121                         withoutUnicodePtr--;
1122                       }
1123                     }
1124                   }
1125                   // consume next character
1126                   unicodeAsBackSlash = false;
1127                   currentCharacter = source[currentPosition++];
1128                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1129                   //                    && (source[currentPosition] == 'u')) {
1130                   //                    getNextUnicodeChar();
1131                   //                  } else {
1132                   if (withoutUnicodePtr != 0) {
1133                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1134                   }
1135                   //                  }
1136
1137                 }
1138               } catch (IndexOutOfBoundsException e) {
1139                 throw new InvalidInputException(UNTERMINATED_STRING);
1140               } catch (InvalidInputException e) {
1141                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1142                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1143                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1144                     if (currentPosition + lookAhead == source.length)
1145                       break;
1146                     if (source[currentPosition + lookAhead] == '\n')
1147                       break;
1148                     if (source[currentPosition + lookAhead] == '\'') {
1149                       currentPosition += lookAhead + 1;
1150                       break;
1151                     }
1152                   }
1153
1154                 }
1155                 throw e; // rethrow
1156               }
1157               if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1158                 if (currentLine == null) {
1159                   currentLine = new NLSLine();
1160                   lines.add(currentLine);
1161                 }
1162                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1163               }
1164               return TokenNameStringConstant;
1165             case '"' :
1166               try {
1167                 // consume next character
1168                 unicodeAsBackSlash = false;
1169                 currentCharacter = source[currentPosition++];
1170                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1171                 //                  && (source[currentPosition] == 'u')) {
1172                 //                  getNextUnicodeChar();
1173                 //                } else {
1174                 //                  if (withoutUnicodePtr != 0) {
1175                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1176                 //                      currentCharacter;
1177                 //                  }
1178                 //                }
1179
1180                 while (currentCharacter != '"') {
1181
1182                   /** ** in PHP \r and \n are valid in string literals *** */
1183                   //                  if ((currentCharacter == '\n')
1184                   //                    || (currentCharacter == '\r')) {
1185                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1186                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1187                   //                      if (currentPosition + lookAhead == source.length)
1188                   //                        break;
1189                   //                      if (source[currentPosition + lookAhead] == '\n')
1190                   //                        break;
1191                   //                      if (source[currentPosition + lookAhead] == '\"') {
1192                   //                        currentPosition += lookAhead + 1;
1193                   //                        break;
1194                   //                      }
1195                   //                    }
1196                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1197                   //                  }
1198                   if (currentCharacter == '\\') {
1199                     int escapeSize = currentPosition;
1200                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1201                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1202                     scanDoubleQuotedEscapeCharacter();
1203                     escapeSize = currentPosition - escapeSize;
1204                     if (withoutUnicodePtr == 0) {
1205                       //buffer all the entries that have been left aside....
1206                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1207                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1208                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1209                     } else { //overwrite the / in the buffer
1210                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1211                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1212                         withoutUnicodePtr--;
1213                       }
1214                     }
1215                   }
1216                   // consume next character
1217                   unicodeAsBackSlash = false;
1218                   currentCharacter = source[currentPosition++];
1219                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1220                   //                    && (source[currentPosition] == 'u')) {
1221                   //                    getNextUnicodeChar();
1222                   //                  } else {
1223                   if (withoutUnicodePtr != 0) {
1224                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1225                   }
1226                   //                  }
1227
1228                 }
1229               } catch (IndexOutOfBoundsException e) {
1230                 throw new InvalidInputException(UNTERMINATED_STRING);
1231               } catch (InvalidInputException e) {
1232                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1233                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1234                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1235                     if (currentPosition + lookAhead == source.length)
1236                       break;
1237                     if (source[currentPosition + lookAhead] == '\n')
1238                       break;
1239                     if (source[currentPosition + lookAhead] == '\"') {
1240                       currentPosition += lookAhead + 1;
1241                       break;
1242                     }
1243                   }
1244
1245                 }
1246                 throw e; // rethrow
1247               }
1248               if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1249                 if (currentLine == null) {
1250                   currentLine = new NLSLine();
1251                   lines.add(currentLine);
1252                 }
1253                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1254               }
1255               return TokenNameStringLiteral;
1256             case '`' :
1257               try {
1258                 // consume next character
1259                 unicodeAsBackSlash = false;
1260                 currentCharacter = source[currentPosition++];
1261                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1262                 //                  && (source[currentPosition] == 'u')) {
1263                 //                  getNextUnicodeChar();
1264                 //                } else {
1265                 //                  if (withoutUnicodePtr != 0) {
1266                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1267                 //                      currentCharacter;
1268                 //                  }
1269                 //                }
1270
1271                 while (currentCharacter != '`') {
1272
1273                   /** ** in PHP \r and \n are valid in string literals *** */
1274                   //                if ((currentCharacter == '\n')
1275                   //                  || (currentCharacter == '\r')) {
1276                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1277                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1278                   //                    if (currentPosition + lookAhead == source.length)
1279                   //                      break;
1280                   //                    if (source[currentPosition + lookAhead] == '\n')
1281                   //                      break;
1282                   //                    if (source[currentPosition + lookAhead] == '\"') {
1283                   //                      currentPosition += lookAhead + 1;
1284                   //                      break;
1285                   //                    }
1286                   //                  }
1287                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1288                   //                }
1289                   if (currentCharacter == '\\') {
1290                     int escapeSize = currentPosition;
1291                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1292                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1293                     scanDoubleQuotedEscapeCharacter();
1294                     escapeSize = currentPosition - escapeSize;
1295                     if (withoutUnicodePtr == 0) {
1296                       //buffer all the entries that have been left aside....
1297                       withoutUnicodePtr = currentPosition - escapeSize - 1 - startPosition;
1298                       System.arraycopy(source, startPosition, withoutUnicodeBuffer, 1, withoutUnicodePtr);
1299                       withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1300                     } else { //overwrite the / in the buffer
1301                       withoutUnicodeBuffer[withoutUnicodePtr] = currentCharacter;
1302                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1303                         withoutUnicodePtr--;
1304                       }
1305                     }
1306                   }
1307                   // consume next character
1308                   unicodeAsBackSlash = false;
1309                   currentCharacter = source[currentPosition++];
1310                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1311                   //                    && (source[currentPosition] == 'u')) {
1312                   //                    getNextUnicodeChar();
1313                   //                  } else {
1314                   if (withoutUnicodePtr != 0) {
1315                     withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1316                   }
1317                   //                  }
1318
1319                 }
1320               } catch (IndexOutOfBoundsException e) {
1321                 throw new InvalidInputException(UNTERMINATED_STRING);
1322               } catch (InvalidInputException e) {
1323                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1324                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1325                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1326                     if (currentPosition + lookAhead == source.length)
1327                       break;
1328                     if (source[currentPosition + lookAhead] == '\n')
1329                       break;
1330                     if (source[currentPosition + lookAhead] == '`') {
1331                       currentPosition += lookAhead + 1;
1332                       break;
1333                     }
1334                   }
1335
1336                 }
1337                 throw e; // rethrow
1338               }
1339               if (checkNonExternalizedStringLiterals) { // check for presence of NLS tags //$NON-NLS-?$ where ? is an int.
1340                 if (currentLine == null) {
1341                   currentLine = new NLSLine();
1342                   lines.add(currentLine);
1343                 }
1344                 currentLine.add(new StringLiteral(getCurrentTokenSourceString(), startPosition, currentPosition - 1));
1345               }
1346               return TokenNameStringInterpolated;
1347             case '#' :
1348             case '/' :
1349               {
1350                 int test;
1351                 if ((currentCharacter == '#') || (test = getNextChar('/', '*')) == 0) {
1352                   //line comment
1353                   int endPositionForLineComment = 0;
1354                   try { //get the next char
1355                     currentCharacter = source[currentPosition++];
1356                     //                    if (((currentCharacter = source[currentPosition++])
1357                     //                      == '\\')
1358                     //                      && (source[currentPosition] == 'u')) {
1359                     //                      //-------------unicode traitement ------------
1360                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1361                     //                      currentPosition++;
1362                     //                      while (source[currentPosition] == 'u') {
1363                     //                        currentPosition++;
1364                     //                      }
1365                     //                      if ((c1 =
1366                     //                        Character.getNumericValue(source[currentPosition++]))
1367                     //                        > 15
1368                     //                        || c1 < 0
1369                     //                        || (c2 =
1370                     //                          Character.getNumericValue(source[currentPosition++]))
1371                     //                          > 15
1372                     //                        || c2 < 0
1373                     //                        || (c3 =
1374                     //                          Character.getNumericValue(source[currentPosition++]))
1375                     //                          > 15
1376                     //                        || c3 < 0
1377                     //                        || (c4 =
1378                     //                          Character.getNumericValue(source[currentPosition++]))
1379                     //                          > 15
1380                     //                        || c4 < 0) {
1381                     //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1382                     //                      } else {
1383                     //                        currentCharacter =
1384                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1385                     //                      }
1386                     //                    }
1387
1388                     //handle the \\u case manually into comment
1389                     //                    if (currentCharacter == '\\') {
1390                     //                      if (source[currentPosition] == '\\')
1391                     //                        currentPosition++;
1392                     //                    } //jump over the \\
1393                     boolean isUnicode = false;
1394                     while (currentCharacter != '\r' && currentCharacter != '\n') {
1395                       if (currentCharacter == '?') {
1396                         if (getNextChar('>')) {
1397                           startPosition = currentPosition - 2;
1398                           phpMode = false;
1399                           return TokenNameStopPHP;
1400                         }
1401                       }
1402
1403                       //get the next char
1404                       isUnicode = false;
1405                       currentCharacter = source[currentPosition++];
1406                       //                      if (((currentCharacter = source[currentPosition++])
1407                       //                        == '\\')
1408                       //                        && (source[currentPosition] == 'u')) {
1409                       //                        isUnicode = true;
1410                       //                        //-------------unicode traitement ------------
1411                       //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1412                       //                        currentPosition++;
1413                       //                        while (source[currentPosition] == 'u') {
1414                       //                          currentPosition++;
1415                       //                        }
1416                       //                        if ((c1 =
1417                       //                          Character.getNumericValue(source[currentPosition++]))
1418                       //                          > 15
1419                       //                          || c1 < 0
1420                       //                          || (c2 =
1421                       //                            Character.getNumericValue(
1422                       //                              source[currentPosition++]))
1423                       //                            > 15
1424                       //                          || c2 < 0
1425                       //                          || (c3 =
1426                       //                            Character.getNumericValue(
1427                       //                              source[currentPosition++]))
1428                       //                            > 15
1429                       //                          || c3 < 0
1430                       //                          || (c4 =
1431                       //                            Character.getNumericValue(
1432                       //                              source[currentPosition++]))
1433                       //                            > 15
1434                       //                          || c4 < 0) {
1435                       //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1436                       //                        } else {
1437                       //                          currentCharacter =
1438                       //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1439                       //                        }
1440                       //                      }
1441                       //handle the \\u case manually into comment
1442                       //                      if (currentCharacter == '\\') {
1443                       //                        if (source[currentPosition] == '\\')
1444                       //                          currentPosition++;
1445                       //                      } //jump over the \\
1446                     }
1447                     if (isUnicode) {
1448                       endPositionForLineComment = currentPosition - 6;
1449                     } else {
1450                       endPositionForLineComment = currentPosition - 1;
1451                     }
1452                     recordComment(false);
1453                     if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1454                       checkNonExternalizeString();
1455                       if (recordLineSeparator) {
1456                         if (isUnicode) {
1457                           pushUnicodeLineSeparator();
1458                         } else {
1459                           pushLineSeparator();
1460                         }
1461                       } else {
1462                         currentLine = null;
1463                       }
1464                     }
1465                     if (tokenizeComments) {
1466                       if (!isUnicode) {
1467                         currentPosition = endPositionForLineComment;
1468                         // reset one character behind
1469                       }
1470                       return TokenNameCOMMENT_LINE;
1471                     }
1472                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1473                     if (tokenizeComments) {
1474                       currentPosition--;
1475                       // reset one character behind
1476                       return TokenNameCOMMENT_LINE;
1477                     }
1478                   }
1479                   break;
1480                 }
1481                 if (test > 0) {
1482                   //traditional and annotation comment
1483                   boolean isJavadoc = false, star = false;
1484                   // consume next character
1485                   unicodeAsBackSlash = false;
1486                   currentCharacter = source[currentPosition++];
1487                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1488                   //                    && (source[currentPosition] == 'u')) {
1489                   //                    getNextUnicodeChar();
1490                   //                  } else {
1491                   //                    if (withoutUnicodePtr != 0) {
1492                   //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1493                   //                        currentCharacter;
1494                   //                    }
1495                   //                  }
1496
1497                   if (currentCharacter == '*') {
1498                     isJavadoc = true;
1499                     star = true;
1500                   }
1501                   if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1502                     checkNonExternalizeString();
1503                     if (recordLineSeparator) {
1504                       pushLineSeparator();
1505                     } else {
1506                       currentLine = null;
1507                     }
1508                   }
1509                   try { //get the next char
1510                     currentCharacter = source[currentPosition++];
1511                     //                    if (((currentCharacter = source[currentPosition++])
1512                     //                      == '\\')
1513                     //                      && (source[currentPosition] == 'u')) {
1514                     //                      //-------------unicode traitement ------------
1515                     //                      getNextUnicodeChar();
1516                     //                    }
1517                     //handle the \\u case manually into comment
1518                     //                    if (currentCharacter == '\\') {
1519                     //                      if (source[currentPosition] == '\\')
1520                     //                        currentPosition++;
1521                     //                      //jump over the \\
1522                     //                    }
1523                     // empty comment is not a javadoc /**/
1524                     if (currentCharacter == '/') {
1525                       isJavadoc = false;
1526                     }
1527                     //loop until end of comment */
1528                     while ((currentCharacter != '/') || (!star)) {
1529                       if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
1530                         checkNonExternalizeString();
1531                         if (recordLineSeparator) {
1532                           pushLineSeparator();
1533                         } else {
1534                           currentLine = null;
1535                         }
1536                       }
1537                       star = currentCharacter == '*';
1538                       //get next char
1539                       currentCharacter = source[currentPosition++];
1540                       //                      if (((currentCharacter = source[currentPosition++])
1541                       //                        == '\\')
1542                       //                        && (source[currentPosition] == 'u')) {
1543                       //                        //-------------unicode traitement ------------
1544                       //                        getNextUnicodeChar();
1545                       //                      }
1546                       //handle the \\u case manually into comment
1547                       //                      if (currentCharacter == '\\') {
1548                       //                        if (source[currentPosition] == '\\')
1549                       //                          currentPosition++;
1550                       //                      } //jump over the \\
1551                     }
1552                     recordComment(isJavadoc);
1553                     if (tokenizeComments) {
1554                       if (isJavadoc)
1555                         return TokenNameCOMMENT_PHPDOC;
1556                       return TokenNameCOMMENT_BLOCK;
1557                     }
1558                   } catch (IndexOutOfBoundsException e) {
1559                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1560                   }
1561                   break;
1562                 }
1563                 if (getNextChar('='))
1564                   return TokenNameDIVIDE_EQUAL;
1565                 return TokenNameDIVIDE;
1566               }
1567             case '\u001a' :
1568               if (atEnd())
1569                 return TokenNameEOF;
1570               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1571               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1572
1573             default :
1574               if (currentCharacter == '$') {
1575                 while ((currentCharacter = source[currentPosition++]) == '$') {
1576                 }
1577                 if (currentCharacter == '{')
1578                   return TokenNameDOLLAR_LBRACE;
1579                 if (isPHPIdentifierStart(currentCharacter))
1580                   return scanIdentifierOrKeyword(true);
1581                 return TokenNameERROR;
1582               }
1583               if (isPHPIdentifierStart(currentCharacter))
1584                 return scanIdentifierOrKeyword(false);
1585               if (Character.isDigit(currentCharacter))
1586                 return scanNumber(false);
1587               return TokenNameERROR;
1588           }
1589         }
1590       } //-----------------end switch while try--------------------
1591       catch (IndexOutOfBoundsException e) {
1592       }
1593     }
1594     return TokenNameEOF;
1595   }
1596
1597   //  public final void getNextUnicodeChar()
1598   //    throws IndexOutOfBoundsException, InvalidInputException {
1599   //    //VOID
1600   //    //handle the case of unicode.
1601   //    //when a unicode appears then we must use a buffer that holds char internal values
1602   //    //At the end of this method currentCharacter holds the new visited char
1603   //    //and currentPosition points right next after it
1604   //
1605   //    //ALL getNextChar.... ARE OPTIMIZED COPIES
1606   //
1607   //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1608   //    currentPosition++;
1609   //    while (source[currentPosition] == 'u') {
1610   //      currentPosition++;
1611   //      unicodeSize++;
1612   //    }
1613   //
1614   //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1615   //      || c1 < 0
1616   //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1617   //      || c2 < 0
1618   //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1619   //      || c3 < 0
1620   //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1621   //      || c4 < 0) {
1622   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1623   //    } else {
1624   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1625   //      //need the unicode buffer
1626   //      if (withoutUnicodePtr == 0) {
1627   //        //buffer all the entries that have been left aside....
1628   //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1629   //        System.arraycopy(
1630   //          source,
1631   //          startPosition,
1632   //          withoutUnicodeBuffer,
1633   //          1,
1634   //          withoutUnicodePtr);
1635   //      }
1636   //      //fill the buffer with the char
1637   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1638   //    }
1639   //    unicodeAsBackSlash = currentCharacter == '\\';
1640   //  }
1641   /*
1642    * Tokenize a method body, assuming that curly brackets are properly balanced.
1643    */
1644   public final void jumpOverMethodBody() {
1645
1646     this.wasAcr = false;
1647     int found = 1;
1648     try {
1649       while (true) { //loop for jumping over comments
1650         // ---------Consume white space and handles startPosition---------
1651         boolean isWhiteSpace;
1652         do {
1653           startPosition = currentPosition;
1654           currentCharacter = source[currentPosition++];
1655           //          if (((currentCharacter = source[currentPosition++]) == '\\')
1656           //            && (source[currentPosition] == 'u')) {
1657           //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1658           //          } else {
1659           if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1660             pushLineSeparator();
1661           isWhiteSpace = Character.isWhitespace(currentCharacter);
1662           //          }
1663         } while (isWhiteSpace);
1664
1665         // -------consume token until } is found---------
1666         switch (currentCharacter) {
1667           case '{' :
1668             found++;
1669             break;
1670           case '}' :
1671             found--;
1672             if (found == 0)
1673               return;
1674             break;
1675           case '\'' :
1676             {
1677               boolean test;
1678               test = getNextChar('\\');
1679               if (test) {
1680                 try {
1681                   scanDoubleQuotedEscapeCharacter();
1682                 } catch (InvalidInputException ex) {
1683                 };
1684               } else {
1685                 //                try { // consume next character
1686                 unicodeAsBackSlash = false;
1687                 currentCharacter = source[currentPosition++];
1688                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1689                 //                    && (source[currentPosition] == 'u')) {
1690                 //                    getNextUnicodeChar();
1691                 //                  } else {
1692                 if (withoutUnicodePtr != 0) {
1693                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1694                 }
1695                 //                  }
1696                 //                } catch (InvalidInputException ex) {
1697                 //                };
1698               }
1699               getNextChar('\'');
1700               break;
1701             }
1702           case '"' :
1703             try {
1704               //              try { // consume next character
1705               unicodeAsBackSlash = false;
1706               currentCharacter = source[currentPosition++];
1707               //                if (((currentCharacter = source[currentPosition++]) == '\\')
1708               //                  && (source[currentPosition] == 'u')) {
1709               //                  getNextUnicodeChar();
1710               //                } else {
1711               if (withoutUnicodePtr != 0) {
1712                 withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1713               }
1714               //                }
1715               //              } catch (InvalidInputException ex) {
1716               //              };
1717               while (currentCharacter != '"') {
1718                 if (currentCharacter == '\r') {
1719                   if (source[currentPosition] == '\n')
1720                     currentPosition++;
1721                   break;
1722                   // the string cannot go further that the line
1723                 }
1724                 if (currentCharacter == '\n') {
1725                   break;
1726                   // the string cannot go further that the line
1727                 }
1728                 if (currentCharacter == '\\') {
1729                   try {
1730                     scanDoubleQuotedEscapeCharacter();
1731                   } catch (InvalidInputException ex) {
1732                   };
1733                 }
1734                 //                try { // consume next character
1735                 unicodeAsBackSlash = false;
1736                 currentCharacter = source[currentPosition++];
1737                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1738                 //                    && (source[currentPosition] == 'u')) {
1739                 //                    getNextUnicodeChar();
1740                 //                  } else {
1741                 if (withoutUnicodePtr != 0) {
1742                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1743                 }
1744                 //                  }
1745                 //                } catch (InvalidInputException ex) {
1746                 //                };
1747               }
1748             } catch (IndexOutOfBoundsException e) {
1749               return;
1750             }
1751             break;
1752           case '/' :
1753             {
1754               int test;
1755               if ((test = getNextChar('/', '*')) == 0) {
1756                 //line comment
1757                 try {
1758                   //get the next char
1759                   currentCharacter = source[currentPosition++];
1760                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1761                   //                    && (source[currentPosition] == 'u')) {
1762                   //                    //-------------unicode traitement ------------
1763                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1764                   //                    currentPosition++;
1765                   //                    while (source[currentPosition] == 'u') {
1766                   //                      currentPosition++;
1767                   //                    }
1768                   //                    if ((c1 =
1769                   //                      Character.getNumericValue(source[currentPosition++]))
1770                   //                      > 15
1771                   //                      || c1 < 0
1772                   //                      || (c2 =
1773                   //                        Character.getNumericValue(source[currentPosition++]))
1774                   //                        > 15
1775                   //                      || c2 < 0
1776                   //                      || (c3 =
1777                   //                        Character.getNumericValue(source[currentPosition++]))
1778                   //                        > 15
1779                   //                      || c3 < 0
1780                   //                      || (c4 =
1781                   //                        Character.getNumericValue(source[currentPosition++]))
1782                   //                        > 15
1783                   //                      || c4 < 0) {
1784                   //                      //error don't care of the value
1785                   //                      currentCharacter = 'A';
1786                   //                    } //something different from \n and \r
1787                   //                    else {
1788                   //                      currentCharacter =
1789                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1790                   //                    }
1791                   //                  }
1792
1793                   while (currentCharacter != '\r' && currentCharacter != '\n') {
1794                     //get the next char
1795                     currentCharacter = source[currentPosition++];
1796                     //                    if (((currentCharacter = source[currentPosition++])
1797                     //                      == '\\')
1798                     //                      && (source[currentPosition] == 'u')) {
1799                     //                      //-------------unicode traitement ------------
1800                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1801                     //                      currentPosition++;
1802                     //                      while (source[currentPosition] == 'u') {
1803                     //                        currentPosition++;
1804                     //                      }
1805                     //                      if ((c1 =
1806                     //                        Character.getNumericValue(source[currentPosition++]))
1807                     //                        > 15
1808                     //                        || c1 < 0
1809                     //                        || (c2 =
1810                     //                          Character.getNumericValue(source[currentPosition++]))
1811                     //                          > 15
1812                     //                        || c2 < 0
1813                     //                        || (c3 =
1814                     //                          Character.getNumericValue(source[currentPosition++]))
1815                     //                          > 15
1816                     //                        || c3 < 0
1817                     //                        || (c4 =
1818                     //                          Character.getNumericValue(source[currentPosition++]))
1819                     //                          > 15
1820                     //                        || c4 < 0) {
1821                     //                        //error don't care of the value
1822                     //                        currentCharacter = 'A';
1823                     //                      } //something different from \n and \r
1824                     //                      else {
1825                     //                        currentCharacter =
1826                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1827                     //                      }
1828                     //                    }
1829                   }
1830                   if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1831                     pushLineSeparator();
1832                 } catch (IndexOutOfBoundsException e) {
1833                 } //an eof will them be generated
1834                 break;
1835               }
1836               if (test > 0) {
1837                 //traditional and annotation comment
1838                 boolean star = false;
1839                 //                try { // consume next character
1840                 unicodeAsBackSlash = false;
1841                 currentCharacter = source[currentPosition++];
1842                 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1843                 //                    && (source[currentPosition] == 'u')) {
1844                 //                    getNextUnicodeChar();
1845                 //                  } else {
1846                 if (withoutUnicodePtr != 0) {
1847                   withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1848                 }
1849                 //                  };
1850                 //                } catch (InvalidInputException ex) {
1851                 //                };
1852                 if (currentCharacter == '*') {
1853                   star = true;
1854                 }
1855                 if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1856                   pushLineSeparator();
1857                 try { //get the next char
1858                   currentCharacter = source[currentPosition++];
1859                   //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1860                   //                    && (source[currentPosition] == 'u')) {
1861                   //                    //-------------unicode traitement ------------
1862                   //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1863                   //                    currentPosition++;
1864                   //                    while (source[currentPosition] == 'u') {
1865                   //                      currentPosition++;
1866                   //                    }
1867                   //                    if ((c1 =
1868                   //                      Character.getNumericValue(source[currentPosition++]))
1869                   //                      > 15
1870                   //                      || c1 < 0
1871                   //                      || (c2 =
1872                   //                        Character.getNumericValue(source[currentPosition++]))
1873                   //                        > 15
1874                   //                      || c2 < 0
1875                   //                      || (c3 =
1876                   //                        Character.getNumericValue(source[currentPosition++]))
1877                   //                        > 15
1878                   //                      || c3 < 0
1879                   //                      || (c4 =
1880                   //                        Character.getNumericValue(source[currentPosition++]))
1881                   //                        > 15
1882                   //                      || c4 < 0) {
1883                   //                      //error don't care of the value
1884                   //                      currentCharacter = 'A';
1885                   //                    } //something different from * and /
1886                   //                    else {
1887                   //                      currentCharacter =
1888                   //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1889                   //                    }
1890                   //                  }
1891                   //loop until end of comment */
1892                   while ((currentCharacter != '/') || (!star)) {
1893                     if (recordLineSeparator && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1894                       pushLineSeparator();
1895                     star = currentCharacter == '*';
1896                     //get next char
1897                     currentCharacter = source[currentPosition++];
1898                     //                    if (((currentCharacter = source[currentPosition++])
1899                     //                      == '\\')
1900                     //                      && (source[currentPosition] == 'u')) {
1901                     //                      //-------------unicode traitement ------------
1902                     //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1903                     //                      currentPosition++;
1904                     //                      while (source[currentPosition] == 'u') {
1905                     //                        currentPosition++;
1906                     //                      }
1907                     //                      if ((c1 =
1908                     //                        Character.getNumericValue(source[currentPosition++]))
1909                     //                        > 15
1910                     //                        || c1 < 0
1911                     //                        || (c2 =
1912                     //                          Character.getNumericValue(source[currentPosition++]))
1913                     //                          > 15
1914                     //                        || c2 < 0
1915                     //                        || (c3 =
1916                     //                          Character.getNumericValue(source[currentPosition++]))
1917                     //                          > 15
1918                     //                        || c3 < 0
1919                     //                        || (c4 =
1920                     //                          Character.getNumericValue(source[currentPosition++]))
1921                     //                          > 15
1922                     //                        || c4 < 0) {
1923                     //                        //error don't care of the value
1924                     //                        currentCharacter = 'A';
1925                     //                      } //something different from * and /
1926                     //                      else {
1927                     //                        currentCharacter =
1928                     //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1929                     //                      }
1930                     //                    }
1931                   }
1932                 } catch (IndexOutOfBoundsException e) {
1933                   return;
1934                 }
1935                 break;
1936               }
1937               break;
1938             }
1939
1940           default :
1941             if (isPHPIdentifierStart(currentCharacter) || currentCharacter == '$') {
1942               try {
1943                 scanIdentifierOrKeyword((currentCharacter == '$'));
1944               } catch (InvalidInputException ex) {
1945               };
1946               break;
1947             }
1948             if (Character.isDigit(currentCharacter)) {
1949               try {
1950                 scanNumber(false);
1951               } catch (InvalidInputException ex) {
1952               };
1953               break;
1954             }
1955         }
1956       }
1957       //-----------------end switch while try--------------------
1958     } catch (IndexOutOfBoundsException e) {
1959     } catch (InvalidInputException e) {
1960     }
1961     return;
1962   }
1963   //  public final boolean jumpOverUnicodeWhiteSpace()
1964   //    throws InvalidInputException {
1965   //    //BOOLEAN
1966   //    //handle the case of unicode. Jump over the next whiteSpace
1967   //    //making startPosition pointing on the next available char
1968   //    //On false, the currentCharacter is filled up with a potential
1969   //    //correct char
1970   //
1971   //    try {
1972   //      this.wasAcr = false;
1973   //      int c1, c2, c3, c4;
1974   //      int unicodeSize = 6;
1975   //      currentPosition++;
1976   //      while (source[currentPosition] == 'u') {
1977   //        currentPosition++;
1978   //        unicodeSize++;
1979   //      }
1980   //
1981   //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1982   //        || c1 < 0)
1983   //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
1984   //          || c2 < 0)
1985   //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
1986   //          || c3 < 0)
1987   //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
1988   //          || c4 < 0)) {
1989   //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1990   //      }
1991   //
1992   //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1993   //      if (recordLineSeparator
1994   //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1995   //        pushLineSeparator();
1996   //      if (Character.isWhitespace(currentCharacter))
1997   //        return true;
1998   //
1999   //      //buffer the new char which is not a white space
2000   //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2001   //      //withoutUnicodePtr == 1 is true here
2002   //      return false;
2003   //    } catch (IndexOutOfBoundsException e) {
2004   //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2005   //    }
2006   //  }
2007   public final int[] getLineEnds() {
2008     //return a bounded copy of this.lineEnds
2009
2010     int[] copy;
2011     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2012     return copy;
2013   }
2014
2015   public char[] getSource() {
2016     return this.source;
2017   }
2018   final char[] optimizedCurrentTokenSource1() {
2019     //return always the same char[] build only once
2020
2021     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2022     char charOne = source[startPosition];
2023     switch (charOne) {
2024       case 'a' :
2025         return charArray_a;
2026       case 'b' :
2027         return charArray_b;
2028       case 'c' :
2029         return charArray_c;
2030       case 'd' :
2031         return charArray_d;
2032       case 'e' :
2033         return charArray_e;
2034       case 'f' :
2035         return charArray_f;
2036       case 'g' :
2037         return charArray_g;
2038       case 'h' :
2039         return charArray_h;
2040       case 'i' :
2041         return charArray_i;
2042       case 'j' :
2043         return charArray_j;
2044       case 'k' :
2045         return charArray_k;
2046       case 'l' :
2047         return charArray_l;
2048       case 'm' :
2049         return charArray_m;
2050       case 'n' :
2051         return charArray_n;
2052       case 'o' :
2053         return charArray_o;
2054       case 'p' :
2055         return charArray_p;
2056       case 'q' :
2057         return charArray_q;
2058       case 'r' :
2059         return charArray_r;
2060       case 's' :
2061         return charArray_s;
2062       case 't' :
2063         return charArray_t;
2064       case 'u' :
2065         return charArray_u;
2066       case 'v' :
2067         return charArray_v;
2068       case 'w' :
2069         return charArray_w;
2070       case 'x' :
2071         return charArray_x;
2072       case 'y' :
2073         return charArray_y;
2074       case 'z' :
2075         return charArray_z;
2076       default :
2077         return new char[] { charOne };
2078     }
2079   }
2080
2081   final char[] optimizedCurrentTokenSource2() {
2082     //try to return the same char[] build only once
2083
2084     char c0, c1;
2085     int hash = (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1])) % TableSize;
2086     char[][] table = charArray_length[0][hash];
2087     int i = newEntry2;
2088     while (++i < InternalTableSize) {
2089       char[] charArray = table[i];
2090       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2091         return charArray;
2092     }
2093     //---------other side---------
2094     i = -1;
2095     int max = newEntry2;
2096     while (++i <= max) {
2097       char[] charArray = table[i];
2098       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2099         return charArray;
2100     }
2101     //--------add the entry-------
2102     if (++max >= InternalTableSize)
2103       max = 0;
2104     char[] r;
2105     table[max] = (r = new char[] { c0, c1 });
2106     newEntry2 = max;
2107     return r;
2108   }
2109
2110   final char[] optimizedCurrentTokenSource3() {
2111     //try to return the same char[] build only once
2112
2113     char c0, c1, c2;
2114     int hash =
2115       (((c0 = source[startPosition]) << 12) + ((c1 = source[startPosition + 1]) << 6) + (c2 = source[startPosition + 2]))
2116         % TableSize;
2117     char[][] table = charArray_length[1][hash];
2118     int i = newEntry3;
2119     while (++i < InternalTableSize) {
2120       char[] charArray = table[i];
2121       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2122         return charArray;
2123     }
2124     //---------other side---------
2125     i = -1;
2126     int max = newEntry3;
2127     while (++i <= max) {
2128       char[] charArray = table[i];
2129       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2130         return charArray;
2131     }
2132     //--------add the entry-------
2133     if (++max >= InternalTableSize)
2134       max = 0;
2135     char[] r;
2136     table[max] = (r = new char[] { c0, c1, c2 });
2137     newEntry3 = max;
2138     return r;
2139   }
2140
2141   final char[] optimizedCurrentTokenSource4() {
2142     //try to return the same char[] build only once
2143
2144     char c0, c1, c2, c3;
2145     long hash =
2146       ((((long) (c0 = source[startPosition])) << 18)
2147         + ((c1 = source[startPosition + 1]) << 12)
2148         + ((c2 = source[startPosition + 2]) << 6)
2149         + (c3 = source[startPosition + 3]))
2150         % TableSize;
2151     char[][] table = charArray_length[2][(int) hash];
2152     int i = newEntry4;
2153     while (++i < InternalTableSize) {
2154       char[] charArray = table[i];
2155       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2156         return charArray;
2157     }
2158     //---------other side---------
2159     i = -1;
2160     int max = newEntry4;
2161     while (++i <= max) {
2162       char[] charArray = table[i];
2163       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]))
2164         return charArray;
2165     }
2166     //--------add the entry-------
2167     if (++max >= InternalTableSize)
2168       max = 0;
2169     char[] r;
2170     table[max] = (r = new char[] { c0, c1, c2, c3 });
2171     newEntry4 = max;
2172     return r;
2173
2174   }
2175
2176   final char[] optimizedCurrentTokenSource5() {
2177     //try to return the same char[] build only once
2178
2179     char c0, c1, c2, c3, c4;
2180     long hash =
2181       ((((long) (c0 = source[startPosition])) << 24)
2182         + (((long) (c1 = source[startPosition + 1])) << 18)
2183         + ((c2 = source[startPosition + 2]) << 12)
2184         + ((c3 = source[startPosition + 3]) << 6)
2185         + (c4 = source[startPosition + 4]))
2186         % TableSize;
2187     char[][] table = charArray_length[3][(int) hash];
2188     int i = newEntry5;
2189     while (++i < InternalTableSize) {
2190       char[] charArray = table[i];
2191       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2192         return charArray;
2193     }
2194     //---------other side---------
2195     i = -1;
2196     int max = newEntry5;
2197     while (++i <= max) {
2198       char[] charArray = table[i];
2199       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]) && (c3 == charArray[3]) && (c4 == charArray[4]))
2200         return charArray;
2201     }
2202     //--------add the entry-------
2203     if (++max >= InternalTableSize)
2204       max = 0;
2205     char[] r;
2206     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2207     newEntry5 = max;
2208     return r;
2209
2210   }
2211
2212   final char[] optimizedCurrentTokenSource6() {
2213     //try to return the same char[] build only once
2214
2215     char c0, c1, c2, c3, c4, c5;
2216     long hash =
2217       ((((long) (c0 = source[startPosition])) << 32)
2218         + (((long) (c1 = source[startPosition + 1])) << 24)
2219         + (((long) (c2 = source[startPosition + 2])) << 18)
2220         + ((c3 = source[startPosition + 3]) << 12)
2221         + ((c4 = source[startPosition + 4]) << 6)
2222         + (c5 = source[startPosition + 5]))
2223         % TableSize;
2224     char[][] table = charArray_length[4][(int) hash];
2225     int i = newEntry6;
2226     while (++i < InternalTableSize) {
2227       char[] charArray = table[i];
2228       if ((c0 == charArray[0])
2229         && (c1 == charArray[1])
2230         && (c2 == charArray[2])
2231         && (c3 == charArray[3])
2232         && (c4 == charArray[4])
2233         && (c5 == charArray[5]))
2234         return charArray;
2235     }
2236     //---------other side---------
2237     i = -1;
2238     int max = newEntry6;
2239     while (++i <= max) {
2240       char[] charArray = table[i];
2241       if ((c0 == charArray[0])
2242         && (c1 == charArray[1])
2243         && (c2 == charArray[2])
2244         && (c3 == charArray[3])
2245         && (c4 == charArray[4])
2246         && (c5 == charArray[5]))
2247         return charArray;
2248     }
2249     //--------add the entry-------
2250     if (++max >= InternalTableSize)
2251       max = 0;
2252     char[] r;
2253     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2254     newEntry6 = max;
2255     return r;
2256   }
2257
2258   public final void pushLineSeparator() throws InvalidInputException {
2259     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2260     final int INCREMENT = 250;
2261
2262     if (this.checkNonExternalizedStringLiterals) {
2263       // reinitialize the current line for non externalize strings purpose
2264       currentLine = null;
2265     }
2266     //currentCharacter is at position currentPosition-1
2267
2268     // cr 000D
2269     if (currentCharacter == '\r') {
2270       int separatorPos = currentPosition - 1;
2271       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2272         return;
2273       //System.out.println("CR-" + separatorPos);
2274       try {
2275         lineEnds[++linePtr] = separatorPos;
2276       } catch (IndexOutOfBoundsException e) {
2277         //linePtr value is correct
2278         int oldLength = lineEnds.length;
2279         int[] old = lineEnds;
2280         lineEnds = new int[oldLength + INCREMENT];
2281         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2282         lineEnds[linePtr] = separatorPos;
2283       }
2284       // look-ahead for merged cr+lf
2285       try {
2286         if (source[currentPosition] == '\n') {
2287           //System.out.println("look-ahead LF-" + currentPosition);
2288           lineEnds[linePtr] = currentPosition;
2289           currentPosition++;
2290           wasAcr = false;
2291         } else {
2292           wasAcr = true;
2293         }
2294       } catch (IndexOutOfBoundsException e) {
2295         wasAcr = true;
2296       }
2297     } else {
2298       // lf 000A
2299       if (currentCharacter == '\n') {
2300         //must merge eventual cr followed by lf
2301         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2302           //System.out.println("merge LF-" + (currentPosition - 1));
2303           lineEnds[linePtr] = currentPosition - 1;
2304         } else {
2305           int separatorPos = currentPosition - 1;
2306           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2307             return;
2308           // System.out.println("LF-" + separatorPos);
2309           try {
2310             lineEnds[++linePtr] = separatorPos;
2311           } catch (IndexOutOfBoundsException e) {
2312             //linePtr value is correct
2313             int oldLength = lineEnds.length;
2314             int[] old = lineEnds;
2315             lineEnds = new int[oldLength + INCREMENT];
2316             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2317             lineEnds[linePtr] = separatorPos;
2318           }
2319         }
2320         wasAcr = false;
2321       }
2322     }
2323   }
2324   public final void pushUnicodeLineSeparator() {
2325     // isUnicode means that the \r or \n has been read as a unicode character
2326
2327     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2328
2329     final int INCREMENT = 250;
2330     //currentCharacter is at position currentPosition-1
2331
2332     if (this.checkNonExternalizedStringLiterals) {
2333       // reinitialize the current line for non externalize strings purpose
2334       currentLine = null;
2335     }
2336
2337     // cr 000D
2338     if (currentCharacter == '\r') {
2339       int separatorPos = currentPosition - 6;
2340       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2341         return;
2342       //System.out.println("CR-" + separatorPos);
2343       try {
2344         lineEnds[++linePtr] = separatorPos;
2345       } catch (IndexOutOfBoundsException e) {
2346         //linePtr value is correct
2347         int oldLength = lineEnds.length;
2348         int[] old = lineEnds;
2349         lineEnds = new int[oldLength + INCREMENT];
2350         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2351         lineEnds[linePtr] = separatorPos;
2352       }
2353       // look-ahead for merged cr+lf
2354       if (source[currentPosition] == '\n') {
2355         //System.out.println("look-ahead LF-" + currentPosition);
2356         lineEnds[linePtr] = currentPosition;
2357         currentPosition++;
2358         wasAcr = false;
2359       } else {
2360         wasAcr = true;
2361       }
2362     } else {
2363       // lf 000A
2364       if (currentCharacter == '\n') {
2365         //must merge eventual cr followed by lf
2366         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2367           //System.out.println("merge LF-" + (currentPosition - 1));
2368           lineEnds[linePtr] = currentPosition - 6;
2369         } else {
2370           int separatorPos = currentPosition - 6;
2371           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2372             return;
2373           // System.out.println("LF-" + separatorPos);
2374           try {
2375             lineEnds[++linePtr] = separatorPos;
2376           } catch (IndexOutOfBoundsException e) {
2377             //linePtr value is correct
2378             int oldLength = lineEnds.length;
2379             int[] old = lineEnds;
2380             lineEnds = new int[oldLength + INCREMENT];
2381             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2382             lineEnds[linePtr] = separatorPos;
2383           }
2384         }
2385         wasAcr = false;
2386       }
2387     }
2388   }
2389   public final void recordComment(boolean isJavadoc) {
2390
2391     // a new annotation comment is recorded
2392     try {
2393       commentStops[++commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2394     } catch (IndexOutOfBoundsException e) {
2395       int oldStackLength = commentStops.length;
2396       int[] oldStack = commentStops;
2397       commentStops = new int[oldStackLength + 30];
2398       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2399       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2400       //grows the positions buffers too
2401       int[] old = commentStarts;
2402       commentStarts = new int[oldStackLength + 30];
2403       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2404     }
2405
2406     //the buffer is of a correct size here
2407     commentStarts[commentPtr] = startPosition;
2408   }
2409   public void resetTo(int begin, int end) {
2410     //reset the scanner to a given position where it may rescan again
2411
2412     diet = false;
2413     initialPosition = startPosition = currentPosition = begin;
2414     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2415     commentPtr = -1; // reset comment stack
2416   }
2417
2418   public final void scanSingleQuotedEscapeCharacter() throws InvalidInputException {
2419     // the string with "\\u" is a legal string of two chars \ and u
2420     //thus we use a direct access to the source (for regular cases).
2421
2422     //    if (unicodeAsBackSlash) {
2423     //      // consume next character
2424     //      unicodeAsBackSlash = false;
2425     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2426     //        && (source[currentPosition] == 'u')) {
2427     //        getNextUnicodeChar();
2428     //      } else {
2429     //        if (withoutUnicodePtr != 0) {
2430     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2431     //        }
2432     //      }
2433     //    } else
2434     currentCharacter = source[currentPosition++];
2435     switch (currentCharacter) {
2436       case '\'' :
2437         currentCharacter = '\'';
2438         break;
2439       case '\\' :
2440         currentCharacter = '\\';
2441         break;
2442       default :
2443         currentCharacter = '\\';
2444         currentPosition--;
2445     }
2446   }
2447
2448   public final void scanDoubleQuotedEscapeCharacter() throws InvalidInputException {
2449     // the string with "\\u" is a legal string of two chars \ and u
2450     //thus we use a direct access to the source (for regular cases).
2451
2452     //    if (unicodeAsBackSlash) {
2453     //      // consume next character
2454     //      unicodeAsBackSlash = false;
2455     //      if (((currentCharacter = source[currentPosition++]) == '\\')
2456     //        && (source[currentPosition] == 'u')) {
2457     //        getNextUnicodeChar();
2458     //      } else {
2459     //        if (withoutUnicodePtr != 0) {
2460     //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2461     //        }
2462     //      }
2463     //    } else
2464     currentCharacter = source[currentPosition++];
2465     switch (currentCharacter) {
2466       //      case 'b' :
2467       //        currentCharacter = '\b';
2468       //        break;
2469       case 't' :
2470         currentCharacter = '\t';
2471         break;
2472       case 'n' :
2473         currentCharacter = '\n';
2474         break;
2475         //      case 'f' :
2476         //        currentCharacter = '\f';
2477         //        break;
2478       case 'r' :
2479         currentCharacter = '\r';
2480         break;
2481       case '\"' :
2482         currentCharacter = '\"';
2483         break;
2484       case '\'' :
2485         currentCharacter = '\'';
2486         break;
2487       case '\\' :
2488         currentCharacter = '\\';
2489         break;
2490       case '$' :
2491         currentCharacter = '$';
2492         break;
2493       default :
2494         // -----------octal escape--------------
2495         // OctalDigit
2496         // OctalDigit OctalDigit
2497         // ZeroToThree OctalDigit OctalDigit
2498
2499         int number = Character.getNumericValue(currentCharacter);
2500         if (number >= 0 && number <= 7) {
2501           boolean zeroToThreeNot = number > 3;
2502           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2503             int digit = Character.getNumericValue(currentCharacter);
2504             if (digit >= 0 && digit <= 7) {
2505               number = (number * 8) + digit;
2506               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
2507                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2508                   currentPosition--;
2509                 } else {
2510                   digit = Character.getNumericValue(currentCharacter);
2511                   if (digit >= 0 && digit <= 7) {
2512                     // has read \ZeroToThree OctalDigit OctalDigit
2513                     number = (number * 8) + digit;
2514                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2515                     currentPosition--;
2516                   }
2517                 }
2518               } else { // has read \OctalDigit NonDigit--> ignore last character
2519                 currentPosition--;
2520               }
2521             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
2522               currentPosition--;
2523             }
2524           } else { // has read \OctalDigit --> ignore last character
2525             currentPosition--;
2526           }
2527           if (number > 255)
2528             throw new InvalidInputException(INVALID_ESCAPE);
2529           currentCharacter = (char) number;
2530         }
2531         //else
2532         //     throw new InvalidInputException(INVALID_ESCAPE);
2533     }
2534   }
2535
2536   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2537   //    return scanIdentifierOrKeyword( false );
2538   //  }
2539
2540   public int scanIdentifierOrKeyword(boolean isVariable) throws InvalidInputException {
2541     //test keywords
2542
2543     //first dispatch on the first char.
2544     //then the length. If there are several
2545     //keywors with the same length AND the same first char, then do another
2546     //disptach on the second char :-)...cool....but fast !
2547
2548     useAssertAsAnIndentifier = false;
2549
2550     while (getNextCharAsJavaIdentifierPart()) {
2551     };
2552
2553     if (isVariable) {
2554       if (new String(getCurrentTokenSource()).equals("$this")) {
2555         return TokenNamethis;
2556       }
2557       return TokenNameVariable;
2558     }
2559     int index, length;
2560     char[] data;
2561     char firstLetter;
2562     //    if (withoutUnicodePtr == 0)
2563
2564     //quick test on length == 1 but not on length > 12 while most identifier
2565     //have a length which is <= 12...but there are lots of identifier with
2566     //only one char....
2567
2568     //      {
2569     if ((length = currentPosition - startPosition) == 1)
2570       return TokenNameIdentifier;
2571     //  data = source;
2572     data = new char[length];
2573     index = startPosition;
2574     for (int i = 0; i < length; i++) {
2575       data[i] = Character.toLowerCase(source[index + i]);
2576     }
2577     index = 0;
2578     //    } else {
2579     //      if ((length = withoutUnicodePtr) == 1)
2580     //        return TokenNameIdentifier;
2581     //      // data = withoutUnicodeBuffer;
2582     //      data = new char[withoutUnicodeBuffer.length];
2583     //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2584     //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2585     //      }
2586     //      index = 1;
2587     //    }
2588
2589     firstLetter = data[index];
2590     switch (firstLetter) {
2591
2592       case 'a' : // as and array abstract
2593         switch (length) {
2594           case 2 : //as
2595             if ((data[++index] == 's')) {
2596               return TokenNameas;
2597             } else {
2598               return TokenNameIdentifier;
2599             }
2600           case 3 : //and
2601             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2602               return TokenNameAND;
2603             } else {
2604               return TokenNameIdentifier;
2605             }
2606           case 5 : // array
2607             if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2608               return TokenNamearray;
2609             else
2610               return TokenNameIdentifier;
2611           case 8 :
2612             if ((data[++index] == 'b')
2613               && (data[++index] == 's')
2614               && (data[++index] == 't')
2615               && (data[++index] == 'r')
2616               && (data[++index] == 'a')
2617               && (data[++index] == 'c')
2618               && (data[++index] == 't'))
2619               return TokenNameabstract;
2620             else
2621               return TokenNameIdentifier;
2622           default :
2623             return TokenNameIdentifier;
2624         }
2625       case 'b' : //break
2626         switch (length) {
2627           case 5 :
2628             if ((data[++index] == 'r') && (data[++index] == 'e') && (data[++index] == 'a') && (data[++index] == 'k'))
2629               return TokenNamebreak;
2630             else
2631               return TokenNameIdentifier;
2632           default :
2633             return TokenNameIdentifier;
2634         }
2635
2636       case 'c' : //case catch class const continue
2637         switch (length) {
2638           case 4 :
2639             if ((data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 'e'))
2640               return TokenNamecase;
2641             else
2642               return TokenNameIdentifier;
2643           case 5 :
2644             if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'c') && (data[++index] == 'h'))
2645               return TokenNamecatch;
2646             if ((data[index] == 'l') && (data[++index] == 'a') && (data[++index] == 's') && (data[++index] == 's'))
2647               return TokenNameclass;
2648             if ((data[index] == 'o') && (data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 't'))
2649               return TokenNameconst;
2650             else
2651               return TokenNameIdentifier;
2652           case 8 :
2653             if ((data[++index] == 'o')
2654               && (data[++index] == 'n')
2655               && (data[++index] == 't')
2656               && (data[++index] == 'i')
2657               && (data[++index] == 'n')
2658               && (data[++index] == 'u')
2659               && (data[++index] == 'e'))
2660               return TokenNamecontinue;
2661             else
2662               return TokenNameIdentifier;
2663           default :
2664             return TokenNameIdentifier;
2665         }
2666
2667       case 'd' : //define declare default do die
2668         switch (length) {
2669           case 2 :
2670             if ((data[++index] == 'o'))
2671               return TokenNamedo;
2672             else
2673               return TokenNameIdentifier;
2674           case 3 :
2675             if ((data[++index] == 'i') && (data[++index] == 'e'))
2676               return TokenNamedie;
2677             else
2678               return TokenNameIdentifier;
2679           case 6 :
2680             if ((data[++index] == 'e')
2681               && (data[++index] == 'f')
2682               && (data[++index] == 'i')
2683               && (data[++index] == 'n')
2684               && (data[++index] == 'e'))
2685               return TokenNamedefine;
2686             else
2687               return TokenNameIdentifier;
2688           case 7 :
2689             if ((data[++index] == 'e')
2690               && (data[++index] == 'c')
2691               && (data[++index] == 'l')
2692               && (data[++index] == 'a')
2693               && (data[++index] == 'r')
2694               && (data[++index] == 'e'))
2695               return TokenNamedeclare;
2696             index = 0;
2697             if ((data[++index] == 'e')
2698               && (data[++index] == 'f')
2699               && (data[++index] == 'a')
2700               && (data[++index] == 'u')
2701               && (data[++index] == 'l')
2702               && (data[++index] == 't'))
2703               return TokenNamedefault;
2704             else
2705               return TokenNameIdentifier;
2706           default :
2707             return TokenNameIdentifier;
2708         }
2709       case 'e' : //echo else exit elseif extends eval
2710         switch (length) {
2711           case 4 :
2712             if ((data[++index] == 'c') && (data[++index] == 'h') && (data[++index] == 'o'))
2713               return TokenNameecho;
2714             else if ((data[index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2715               return TokenNameelse;
2716             else if ((data[index] == 'x') && (data[++index] == 'i') && (data[++index] == 't'))
2717               return TokenNameexit;
2718             else if ((data[index] == 'v') && (data[++index] == 'a') && (data[++index] == 'l'))
2719               return TokenNameeval;
2720             else
2721               return TokenNameIdentifier;
2722           case 5 : // endif empty
2723             if ((data[++index] == 'n') && (data[++index] == 'd') && (data[++index] == 'i') && (data[++index] == 'f'))
2724               return TokenNameendif;
2725             if ((data[index] == 'm') && (data[++index] == 'p') && (data[++index] == 't') && (data[++index] == 'y'))
2726               return TokenNameempty;
2727             else
2728               return TokenNameIdentifier;
2729           case 6 : // endfor
2730             if ((data[++index] == 'n')
2731               && (data[++index] == 'd')
2732               && (data[++index] == 'f')
2733               && (data[++index] == 'o')
2734               && (data[++index] == 'r'))
2735               return TokenNameendfor;
2736             else if (
2737               (data[index] == 'l')
2738                 && (data[++index] == 's')
2739                 && (data[++index] == 'e')
2740                 && (data[++index] == 'i')
2741                 && (data[++index] == 'f'))
2742               return TokenNameelseif;
2743             else
2744               return TokenNameIdentifier;
2745           case 7 :
2746             if ((data[++index] == 'x')
2747               && (data[++index] == 't')
2748               && (data[++index] == 'e')
2749               && (data[++index] == 'n')
2750               && (data[++index] == 'd')
2751               && (data[++index] == 's'))
2752               return TokenNameextends;
2753             else
2754               return TokenNameIdentifier;
2755           case 8 : // endwhile
2756             if ((data[++index] == 'n')
2757               && (data[++index] == 'd')
2758               && (data[++index] == 'w')
2759               && (data[++index] == 'h')
2760               && (data[++index] == 'i')
2761               && (data[++index] == 'l')
2762               && (data[++index] == 'e'))
2763               return TokenNameendwhile;
2764             else
2765               return TokenNameIdentifier;
2766           case 9 : // endswitch
2767             if ((data[++index] == 'n')
2768               && (data[++index] == 'd')
2769               && (data[++index] == 's')
2770               && (data[++index] == 'w')
2771               && (data[++index] == 'i')
2772               && (data[++index] == 't')
2773               && (data[++index] == 'c')
2774               && (data[++index] == 'h'))
2775               return TokenNameendswitch;
2776             else
2777               return TokenNameIdentifier;
2778           case 10 : // enddeclare
2779             if ((data[++index] == 'n')
2780               && (data[++index] == 'd')
2781               && (data[++index] == 'd')
2782               && (data[++index] == 'e')
2783               && (data[++index] == 'c')
2784               && (data[++index] == 'l')
2785               && (data[++index] == 'a')
2786               && (data[++index] == 'r')
2787               && (data[++index] == 'e'))
2788               return TokenNameendforeach;
2789             index = 0;
2790             if ((data[++index] == 'n') // endforeach
2791               && (data[++index] == 'd')
2792               && (data[++index] == 'f')
2793               && (data[++index] == 'o')
2794               && (data[++index] == 'r')
2795               && (data[++index] == 'e')
2796               && (data[++index] == 'a')
2797               && (data[++index] == 'c')
2798               && (data[++index] == 'h'))
2799               return TokenNameendforeach;
2800             else
2801               return TokenNameIdentifier;
2802
2803           default :
2804             return TokenNameIdentifier;
2805         }
2806
2807       case 'f' : //for false final function
2808         switch (length) {
2809           case 3 :
2810             if ((data[++index] == 'o') && (data[++index] == 'r'))
2811               return TokenNamefor;
2812             else
2813               return TokenNameIdentifier;
2814           case 5 :
2815             if ((data[++index] == 'a') && (data[++index] == 'l') && (data[++index] == 's') && (data[++index] == 'e'))
2816               return TokenNamefalse;
2817             if ((data[index] == 'i') && (data[++index] == 'n') && (data[++index] == 'a') && (data[++index] == 'l'))
2818               return TokenNamefinal;
2819             else
2820               return TokenNameIdentifier;
2821           case 7 : // foreach
2822             if ((data[++index] == 'o')
2823               && (data[++index] == 'r')
2824               && (data[++index] == 'e')
2825               && (data[++index] == 'a')
2826               && (data[++index] == 'c')
2827               && (data[++index] == 'h'))
2828               return TokenNameforeach;
2829             else
2830               return TokenNameIdentifier;
2831           case 8 : // function
2832             if ((data[++index] == 'u')
2833               && (data[++index] == 'n')
2834               && (data[++index] == 'c')
2835               && (data[++index] == 't')
2836               && (data[++index] == 'i')
2837               && (data[++index] == 'o')
2838               && (data[++index] == 'n'))
2839               return TokenNamefunction;
2840             else
2841               return TokenNameIdentifier;
2842           default :
2843             return TokenNameIdentifier;
2844         }
2845       case 'g' : //global
2846         if (length == 6) {
2847           if ((data[++index] == 'l')
2848             && (data[++index] == 'o')
2849             && (data[++index] == 'b')
2850             && (data[++index] == 'a')
2851             && (data[++index] == 'l')) {
2852             return TokenNameglobal;
2853           }
2854         }
2855         return TokenNameIdentifier;
2856
2857       case 'i' : //if int isset include include_once instanceof interface implements
2858         switch (length) {
2859           case 2 :
2860             if (data[++index] == 'f')
2861               return TokenNameif;
2862             else
2863               return TokenNameIdentifier;
2864             //          case 3 :
2865             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2866             //              return TokenNameint;
2867             //            else
2868             //              return TokenNameIdentifier;
2869           case 5 :
2870             if ((data[++index] == 's') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
2871               return TokenNameisset;
2872             else
2873               return TokenNameIdentifier;
2874           case 7 :
2875             if ((data[++index] == 'n')
2876               && (data[++index] == 'c')
2877               && (data[++index] == 'l')
2878               && (data[++index] == 'u')
2879               && (data[++index] == 'd')
2880               && (data[++index] == 'e'))
2881               return TokenNameinclude;
2882             else
2883               return TokenNameIdentifier;
2884           case 9 : // interface
2885             if ((data[++index] == 'n')
2886               && (data[++index] == 't')
2887               && (data[++index] == 'e')
2888               && (data[++index] == 'r')
2889               && (data[++index] == 'f')
2890               && (data[++index] == 'a')
2891               && (data[++index] == 'c')
2892               && (data[++index] == 'e'))
2893               return TokenNameinterface;
2894             else
2895               return TokenNameIdentifier;
2896           case 10 : // instanceof
2897             if ((data[++index] == 'n')
2898               && (data[++index] == 's')
2899               && (data[++index] == 't')
2900               && (data[++index] == 'a')
2901               && (data[++index] == 'n')
2902               && (data[++index] == 'c')
2903               && (data[++index] == 'e')
2904               && (data[++index] == 'o')
2905               && (data[++index] == 'f'))
2906               return TokenNameinstanceof;
2907             if ((data[index] == 'm')
2908               && (data[++index] == 'p')
2909               && (data[++index] == 'l')
2910               && (data[++index] == 'e')
2911               && (data[++index] == 'm')
2912               && (data[++index] == 'e')
2913               && (data[++index] == 'n')
2914               && (data[++index] == 't')
2915               && (data[++index] == 's'))
2916               return TokenNameimplements;
2917             else
2918               return TokenNameIdentifier;
2919           case 12 :
2920             if ((data[++index] == 'n')
2921               && (data[++index] == 'c')
2922               && (data[++index] == 'l')
2923               && (data[++index] == 'u')
2924               && (data[++index] == 'd')
2925               && (data[++index] == 'e')
2926               && (data[++index] == '_')
2927               && (data[++index] == 'o')
2928               && (data[++index] == 'n')
2929               && (data[++index] == 'c')
2930               && (data[++index] == 'e'))
2931               return TokenNameinclude_once;
2932             else
2933               return TokenNameIdentifier;
2934           default :
2935             return TokenNameIdentifier;
2936         }
2937
2938       case 'l' : //list
2939         if (length == 4) {
2940           if ((data[++index] == 'i') && (data[++index] == 's') && (data[++index] == 't')) {
2941             return TokenNamelist;
2942           }
2943         }
2944         return TokenNameIdentifier;
2945
2946       case 'n' : // new null
2947         switch (length) {
2948           case 3 :
2949             if ((data[++index] == 'e') && (data[++index] == 'w'))
2950               return TokenNamenew;
2951             else
2952               return TokenNameIdentifier;
2953           case 4 :
2954             if ((data[++index] == 'u') && (data[++index] == 'l') && (data[++index] == 'l'))
2955               return TokenNamenull;
2956             else
2957               return TokenNameIdentifier;
2958
2959           default :
2960             return TokenNameIdentifier;
2961         }
2962       case 'o' : // or old_function
2963         if (length == 2) {
2964           if (data[++index] == 'r') {
2965             return TokenNameOR;
2966           }
2967         }
2968         //        if (length == 12) {
2969         //          if ((data[++index] == 'l')
2970         //            && (data[++index] == 'd')
2971         //            && (data[++index] == '_')
2972         //            && (data[++index] == 'f')
2973         //            && (data[++index] == 'u')
2974         //            && (data[++index] == 'n')
2975         //            && (data[++index] == 'c')
2976         //            && (data[++index] == 't')
2977         //            && (data[++index] == 'i')
2978         //            && (data[++index] == 'o')
2979         //            && (data[++index] == 'n')) {
2980         //            return TokenNameold_function;
2981         //          }
2982         //        }
2983         return TokenNameIdentifier;
2984
2985       case 'p' : // print public private protected
2986         switch (length) {
2987           case 5 :
2988             if ((data[++index] == 'r') && (data[++index] == 'i') && (data[++index] == 'n') && (data[++index] == 't')) {
2989               return TokenNameprint;
2990             } else
2991               return TokenNameIdentifier;
2992           case 6 :
2993             if ((data[++index] == 'u')
2994               && (data[++index] == 'b')
2995               && (data[++index] == 'l')
2996               && (data[++index] == 'i')
2997               && (data[++index] == 'c')) {
2998               return TokenNamepublic;
2999             } else
3000               return TokenNameIdentifier;
3001           case 7 :
3002             if ((data[++index] == 'r')
3003               && (data[++index] == 'i')
3004               && (data[++index] == 'v')
3005               && (data[++index] == 'a')
3006               && (data[++index] == 't')
3007               && (data[++index] == 'e')) {
3008               return TokenNameprivate;
3009             } else
3010               return TokenNameIdentifier;
3011           case 9 :
3012             if ((data[++index] == 'r')
3013               && (data[++index] == 'o')
3014               && (data[++index] == 't')
3015               && (data[++index] == 'e')
3016               && (data[++index] == 'c')
3017               && (data[++index] == 't')
3018               && (data[++index] == 'e')
3019               && (data[++index] == 'd')) {
3020               return TokenNameprotected;
3021             } else
3022               return TokenNameIdentifier;
3023         }
3024         return TokenNameIdentifier;
3025       case 'r' : //return require require_once
3026         if (length == 6) {
3027           if ((data[++index] == 'e')
3028             && (data[++index] == 't')
3029             && (data[++index] == 'u')
3030             && (data[++index] == 'r')
3031             && (data[++index] == 'n')) {
3032             return TokenNamereturn;
3033           }
3034         } else if (length == 7) {
3035           if ((data[++index] == 'e')
3036             && (data[++index] == 'q')
3037             && (data[++index] == 'u')
3038             && (data[++index] == 'i')
3039             && (data[++index] == 'r')
3040             && (data[++index] == 'e')) {
3041             return TokenNamerequire;
3042           }
3043         } else if (length == 12) {
3044           if ((data[++index] == 'e')
3045             && (data[++index] == 'q')
3046             && (data[++index] == 'u')
3047             && (data[++index] == 'i')
3048             && (data[++index] == 'r')
3049             && (data[++index] == 'e')
3050             && (data[++index] == '_')
3051             && (data[++index] == 'o')
3052             && (data[++index] == 'n')
3053             && (data[++index] == 'c')
3054             && (data[++index] == 'e')) {
3055             return TokenNamerequire_once;
3056           }
3057         } else
3058           return TokenNameIdentifier;
3059
3060       case 's' : //static switch
3061         switch (length) {
3062           case 6 :
3063             if (data[++index] == 't')
3064               if ((data[++index] == 'a') && (data[++index] == 't') && (data[++index] == 'i') && (data[++index] == 'c')) {
3065                 return TokenNamestatic;
3066               } else
3067                 return TokenNameIdentifier;
3068             else if (
3069               (data[index] == 'w')
3070                 && (data[++index] == 'i')
3071                 && (data[++index] == 't')
3072                 && (data[++index] == 'c')
3073                 && (data[++index] == 'h'))
3074               return TokenNameswitch;
3075             else
3076               return TokenNameIdentifier;
3077           default :
3078             return TokenNameIdentifier;
3079         }
3080
3081       case 't' : // try true throw
3082         switch (length) {
3083           case 3 :
3084             if ((data[++index] == 'r') && (data[++index] == 'y'))
3085               return TokenNametry;
3086             else
3087               return TokenNameIdentifier;
3088           case 4 :
3089             if ((data[++index] == 'r') && (data[++index] == 'u') && (data[++index] == 'e'))
3090               return TokenNametrue;
3091             else
3092               return TokenNameIdentifier;
3093           case 5 :
3094             if ((data[++index] == 'h') && (data[++index] == 'r') && (data[++index] == 'o') && (data[++index] == 'w'))
3095               return TokenNamethrow;
3096             else
3097               return TokenNameIdentifier;
3098
3099           default :
3100             return TokenNameIdentifier;
3101         }
3102       case 'u' : //use unset
3103         switch (length) {
3104           case 3 :
3105             if ((data[++index] == 's') && (data[++index] == 'e'))
3106               return TokenNameuse;
3107             else
3108               return TokenNameIdentifier;
3109           case 5 :
3110             if ((data[++index] == 'n') && (data[++index] == 's') && (data[++index] == 'e') && (data[++index] == 't'))
3111               return TokenNameunset;
3112             else
3113               return TokenNameIdentifier;
3114           default :
3115             return TokenNameIdentifier;
3116         }
3117       case 'v' : //var
3118         switch (length) {
3119           case 3 :
3120             if ((data[++index] == 'a') && (data[++index] == 'r'))
3121               return TokenNamevar;
3122             else
3123               return TokenNameIdentifier;
3124
3125           default :
3126             return TokenNameIdentifier;
3127         }
3128
3129       case 'w' : //while
3130         switch (length) {
3131           case 5 :
3132             if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 'l') && (data[++index] == 'e'))
3133               return TokenNamewhile;
3134             else
3135               return TokenNameIdentifier;
3136             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&&
3137             // (data[++index]=='p'))
3138             //return TokenNamewidefp ;
3139             //else
3140             //return TokenNameIdentifier;
3141           default :
3142             return TokenNameIdentifier;
3143         }
3144
3145       case 'x' : //xor
3146         switch (length) {
3147           case 3 :
3148             if ((data[++index] == 'o') && (data[++index] == 'r'))
3149               return TokenNameXOR;
3150             else
3151               return TokenNameIdentifier;
3152
3153           default :
3154             return TokenNameIdentifier;
3155         }
3156       default :
3157         return TokenNameIdentifier;
3158     }
3159   }
3160   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3161
3162     //when entering this method the currentCharacter is the firt
3163     //digit of the number , i.e. it may be preceeded by a . when
3164     //dotPrefix is true
3165
3166     boolean floating = dotPrefix;
3167     if ((!dotPrefix) && (currentCharacter == '0')) {
3168       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3169         //force the first char of the hexa number do exist...
3170         // consume next character
3171         unicodeAsBackSlash = false;
3172         currentCharacter = source[currentPosition++];
3173         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3174         //          && (source[currentPosition] == 'u')) {
3175         //          getNextUnicodeChar();
3176         //        } else {
3177         //          if (withoutUnicodePtr != 0) {
3178         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3179         //          }
3180         //        }
3181         if (Character.digit(currentCharacter, 16) == -1)
3182           throw new InvalidInputException(INVALID_HEXA);
3183         //---end forcing--
3184         while (getNextCharAsDigit(16)) {
3185         };
3186         //        if (getNextChar('l', 'L') >= 0)
3187         //          return TokenNameLongLiteral;
3188         //        else
3189         return TokenNameIntegerLiteral;
3190       }
3191
3192       //there is x or X in the number
3193       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3194       if (getNextCharAsDigit()) {
3195         //-------------potential octal-----------------
3196         while (getNextCharAsDigit()) {
3197         };
3198
3199         //        if (getNextChar('l', 'L') >= 0) {
3200         //          return TokenNameLongLiteral;
3201         //        }
3202         //
3203         //        if (getNextChar('f', 'F') >= 0) {
3204         //          return TokenNameFloatingPointLiteral;
3205         //        }
3206
3207         if (getNextChar('d', 'D') >= 0) {
3208           return TokenNameDoubleLiteral;
3209         } else { //make the distinction between octal and float ....
3210           if (getNextChar('.')) { //bingo ! ....
3211             while (getNextCharAsDigit()) {
3212             };
3213             if (getNextChar('e', 'E') >= 0) {
3214               // consume next character
3215               unicodeAsBackSlash = false;
3216               currentCharacter = source[currentPosition++];
3217               //              if (((currentCharacter = source[currentPosition++]) == '\\')
3218               //                && (source[currentPosition] == 'u')) {
3219               //                getNextUnicodeChar();
3220               //              } else {
3221               //                if (withoutUnicodePtr != 0) {
3222               //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3223               //                }
3224               //              }
3225
3226               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3227                 // consume next character
3228                 unicodeAsBackSlash = false;
3229                 currentCharacter = source[currentPosition++];
3230                 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3231                 //                  && (source[currentPosition] == 'u')) {
3232                 //                  getNextUnicodeChar();
3233                 //                } else {
3234                 //                  if (withoutUnicodePtr != 0) {
3235                 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3236                 //                      currentCharacter;
3237                 //                  }
3238                 //                }
3239               }
3240               if (!Character.isDigit(currentCharacter))
3241                 throw new InvalidInputException(INVALID_FLOAT);
3242               while (getNextCharAsDigit()) {
3243               };
3244             }
3245             //            if (getNextChar('f', 'F') >= 0)
3246             //              return TokenNameFloatingPointLiteral;
3247             getNextChar('d', 'D'); //jump over potential d or D
3248             return TokenNameDoubleLiteral;
3249           } else {
3250             return TokenNameIntegerLiteral;
3251           }
3252         }
3253       } else {
3254         /* carry on */
3255       }
3256     }
3257
3258     while (getNextCharAsDigit()) {
3259     };
3260
3261     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3262     //      return TokenNameLongLiteral;
3263
3264     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3265       while (getNextCharAsDigit()) {
3266       };
3267       floating = true;
3268     }
3269
3270     //if floating is true both exponant and suffix may be optional
3271
3272     if (getNextChar('e', 'E') >= 0) {
3273       floating = true;
3274       // consume next character
3275       unicodeAsBackSlash = false;
3276       currentCharacter = source[currentPosition++];
3277       //      if (((currentCharacter = source[currentPosition++]) == '\\')
3278       //        && (source[currentPosition] == 'u')) {
3279       //        getNextUnicodeChar();
3280       //      } else {
3281       //        if (withoutUnicodePtr != 0) {
3282       //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3283       //        }
3284       //      }
3285
3286       if ((currentCharacter == '-') || (currentCharacter == '+')) { // consume next character
3287         unicodeAsBackSlash = false;
3288         currentCharacter = source[currentPosition++];
3289         //        if (((currentCharacter = source[currentPosition++]) == '\\')
3290         //          && (source[currentPosition] == 'u')) {
3291         //          getNextUnicodeChar();
3292         //        } else {
3293         //          if (withoutUnicodePtr != 0) {
3294         //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3295         //          }
3296         //        }
3297       }
3298       if (!Character.isDigit(currentCharacter))
3299         throw new InvalidInputException(INVALID_FLOAT);
3300       while (getNextCharAsDigit()) {
3301       };
3302     }
3303
3304     if (getNextChar('d', 'D') >= 0)
3305       return TokenNameDoubleLiteral;
3306     //    if (getNextChar('f', 'F') >= 0)
3307     //      return TokenNameFloatingPointLiteral;
3308
3309     //the long flag has been tested before
3310
3311     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3312   }
3313   /**
3314    * Search the line number corresponding to a specific position
3315    *  
3316    */
3317   public final int getLineNumber(int position) {
3318
3319     if (lineEnds == null)
3320       return 1;
3321     int length = linePtr + 1;
3322     if (length == 0)
3323       return 1;
3324     int g = 0, d = length - 1;
3325     int m = 0;
3326     while (g <= d) {
3327       m = (g + d) / 2;
3328       if (position < lineEnds[m]) {
3329         d = m - 1;
3330       } else if (position > lineEnds[m]) {
3331         g = m + 1;
3332       } else {
3333         return m + 1;
3334       }
3335     }
3336     if (position < lineEnds[m]) {
3337       return m + 1;
3338     }
3339     return m + 2;
3340   }
3341
3342   public void setPHPMode(boolean mode) {
3343     phpMode = mode;
3344   }
3345
3346   public final void setSource(char[] source) {
3347     //the source-buffer is set to sourceString
3348
3349     if (source == null) {
3350       this.source = new char[0];
3351     } else {
3352       this.source = source;
3353     }
3354     startPosition = -1;
3355     initialPosition = currentPosition = 0;
3356     containsAssertKeyword = false;
3357     withoutUnicodeBuffer = new char[this.source.length];
3358
3359   }
3360
3361   public String toString() {
3362     if (startPosition == source.length)
3363       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3364     if (currentPosition > source.length)
3365       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3366
3367     char front[] = new char[startPosition];
3368     System.arraycopy(source, 0, front, 0, startPosition);
3369
3370     int middleLength = (currentPosition - 1) - startPosition + 1;
3371     char middle[];
3372     if (middleLength > -1) {
3373       middle = new char[middleLength];
3374       System.arraycopy(source, startPosition, middle, 0, middleLength);
3375     } else {
3376       middle = new char[0];
3377     }
3378
3379     char end[] = new char[source.length - (currentPosition - 1)];
3380     System.arraycopy(source, (currentPosition - 1) + 1, end, 0, source.length - (currentPosition - 1) - 1);
3381
3382     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3383     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3384     + new String(end);
3385   }
3386   public final String toStringAction(int act) {
3387
3388     switch (act) {
3389       case TokenNameERROR :
3390         return "ScannerError"; // + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3391       case TokenNameStopPHP :
3392         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3393       case TokenNameIdentifier :
3394         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3395       case TokenNameVariable :
3396         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3397       case TokenNameas :
3398         return "as"; //$NON-NLS-1$
3399       case TokenNamebreak :
3400         return "break"; //$NON-NLS-1$
3401       case TokenNamecase :
3402         return "case"; //$NON-NLS-1$
3403       case TokenNameclass :
3404         return "class"; //$NON-NLS-1$
3405       case TokenNamecontinue :
3406         return "continue"; //$NON-NLS-1$
3407       case TokenNamedefault :
3408         return "default"; //$NON-NLS-1$
3409       case TokenNamedefine :
3410         return "define"; //$NON-NLS-1$
3411       case TokenNamedo :
3412         return "do"; //$NON-NLS-1$
3413       case TokenNameecho :
3414         return "echo"; //$NON-NLS-1$
3415       case TokenNameelse :
3416         return "else"; //$NON-NLS-1$
3417       case TokenNameelseif :
3418         return "elseif"; //$NON-NLS-1$
3419       case TokenNameendfor :
3420         return "endfor"; //$NON-NLS-1$
3421       case TokenNameendforeach :
3422         return "endforeach"; //$NON-NLS-1$
3423       case TokenNameendif :
3424         return "endif"; //$NON-NLS-1$
3425       case TokenNameendswitch :
3426         return "endswitch"; //$NON-NLS-1$
3427       case TokenNameendwhile :
3428         return "endwhile"; //$NON-NLS-1$
3429       case TokenNameextends :
3430         return "extends"; //$NON-NLS-1$
3431       case TokenNamefalse :
3432         return "false"; //$NON-NLS-1$
3433       case TokenNamefor :
3434         return "for"; //$NON-NLS-1$
3435       case TokenNameforeach :
3436         return "foreach"; //$NON-NLS-1$
3437       case TokenNamefunction :
3438         return "function"; //$NON-NLS-1$
3439       case TokenNameglobal :
3440         return "global"; //$NON-NLS-1$
3441       case TokenNameif :
3442         return "if"; //$NON-NLS-1$
3443       case TokenNameinclude :
3444         return "include"; //$NON-NLS-1$
3445       case TokenNameinclude_once :
3446         return "include_once"; //$NON-NLS-1$
3447       case TokenNamelist :
3448         return "list"; //$NON-NLS-1$
3449       case TokenNamenew :
3450         return "new"; //$NON-NLS-1$
3451       case TokenNamenull :
3452         return "null"; //$NON-NLS-1$
3453       case TokenNameprint :
3454         return "print"; //$NON-NLS-1$
3455       case TokenNamerequire :
3456         return "require"; //$NON-NLS-1$
3457       case TokenNamerequire_once :
3458         return "require_once"; //$NON-NLS-1$
3459       case TokenNamereturn :
3460         return "return"; //$NON-NLS-1$
3461       case TokenNamestatic :
3462         return "static"; //$NON-NLS-1$
3463       case TokenNameswitch :
3464         return "switch"; //$NON-NLS-1$
3465       case TokenNametrue :
3466         return "true"; //$NON-NLS-1$
3467       case TokenNamevar :
3468         return "var"; //$NON-NLS-1$
3469       case TokenNamewhile :
3470         return "while"; //$NON-NLS-1$
3471       case TokenNamethis :
3472         return "$this"; //$NON-NLS-1$
3473       case TokenNameIntegerLiteral :
3474         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3475       case TokenNameDoubleLiteral :
3476         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3477       case TokenNameStringLiteral :
3478         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3479       case TokenNameStringConstant :
3480         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3481       case TokenNameStringInterpolated :
3482         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3483       case TokenNameHEREDOC :
3484         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3485
3486       case TokenNamePLUS_PLUS :
3487         return "++"; //$NON-NLS-1$
3488       case TokenNameMINUS_MINUS :
3489         return "--"; //$NON-NLS-1$
3490       case TokenNameEQUAL_EQUAL :
3491         return "=="; //$NON-NLS-1$
3492       case TokenNameEQUAL_EQUAL_EQUAL :
3493         return "==="; //$NON-NLS-1$
3494       case TokenNameEQUAL_GREATER :
3495         return "=>"; //$NON-NLS-1$
3496       case TokenNameLESS_EQUAL :
3497         return "<="; //$NON-NLS-1$
3498       case TokenNameGREATER_EQUAL :
3499         return ">="; //$NON-NLS-1$
3500       case TokenNameNOT_EQUAL :
3501         return "!="; //$NON-NLS-1$
3502       case TokenNameNOT_EQUAL_EQUAL :
3503         return "!=="; //$NON-NLS-1$
3504       case TokenNameLEFT_SHIFT :
3505         return "<<"; //$NON-NLS-1$
3506       case TokenNameRIGHT_SHIFT :
3507         return ">>"; //$NON-NLS-1$
3508       case TokenNamePLUS_EQUAL :
3509         return "+="; //$NON-NLS-1$
3510       case TokenNameMINUS_EQUAL :
3511         return "-="; //$NON-NLS-1$
3512       case TokenNameMULTIPLY_EQUAL :
3513         return "*="; //$NON-NLS-1$
3514       case TokenNameDIVIDE_EQUAL :
3515         return "/="; //$NON-NLS-1$
3516       case TokenNameAND_EQUAL :
3517         return "&="; //$NON-NLS-1$
3518       case TokenNameOR_EQUAL :
3519         return "|="; //$NON-NLS-1$
3520       case TokenNameXOR_EQUAL :
3521         return "^="; //$NON-NLS-1$
3522       case TokenNameREMAINDER_EQUAL :
3523         return "%="; //$NON-NLS-1$
3524       case TokenNameLEFT_SHIFT_EQUAL :
3525         return "<<="; //$NON-NLS-1$
3526       case TokenNameRIGHT_SHIFT_EQUAL :
3527         return ">>="; //$NON-NLS-1$
3528       case TokenNameOR_OR :
3529         return "||"; //$NON-NLS-1$
3530       case TokenNameAND_AND :
3531         return "&&"; //$NON-NLS-1$
3532       case TokenNamePLUS :
3533         return "+"; //$NON-NLS-1$
3534       case TokenNameMINUS :
3535         return "-"; //$NON-NLS-1$
3536       case TokenNameMINUS_GREATER :
3537         return "->";
3538       case TokenNameNOT :
3539         return "!"; //$NON-NLS-1$
3540       case TokenNameREMAINDER :
3541         return "%"; //$NON-NLS-1$
3542       case TokenNameXOR :
3543         return "^"; //$NON-NLS-1$
3544       case TokenNameAND :
3545         return "&"; //$NON-NLS-1$
3546       case TokenNameMULTIPLY :
3547         return "*"; //$NON-NLS-1$
3548       case TokenNameOR :
3549         return "|"; //$NON-NLS-1$
3550       case TokenNameTWIDDLE :
3551         return "~"; //$NON-NLS-1$
3552       case TokenNameTWIDDLE_EQUAL :
3553         return "~="; //$NON-NLS-1$
3554       case TokenNameDIVIDE :
3555         return "/"; //$NON-NLS-1$
3556       case TokenNameGREATER :
3557         return ">"; //$NON-NLS-1$
3558       case TokenNameLESS :
3559         return "<"; //$NON-NLS-1$
3560       case TokenNameLPAREN :
3561         return "("; //$NON-NLS-1$
3562       case TokenNameRPAREN :
3563         return ")"; //$NON-NLS-1$
3564       case TokenNameLBRACE :
3565         return "{"; //$NON-NLS-1$
3566       case TokenNameRBRACE :
3567         return "}"; //$NON-NLS-1$
3568       case TokenNameLBRACKET :
3569         return "["; //$NON-NLS-1$
3570       case TokenNameRBRACKET :
3571         return "]"; //$NON-NLS-1$
3572       case TokenNameSEMICOLON :
3573         return ";"; //$NON-NLS-1$
3574       case TokenNameQUESTION :
3575         return "?"; //$NON-NLS-1$
3576       case TokenNameCOLON :
3577         return ":"; //$NON-NLS-1$
3578       case TokenNameCOMMA :
3579         return ","; //$NON-NLS-1$
3580       case TokenNameDOT :
3581         return "."; //$NON-NLS-1$
3582       case TokenNameEQUAL :
3583         return "="; //$NON-NLS-1$
3584       case TokenNameAT :
3585         return "@";
3586       case TokenNameDOLLAR_LBRACE :
3587         return "${";
3588       case TokenNameEOF :
3589         return "EOF"; //$NON-NLS-1$
3590       case TokenNameWHITESPACE :
3591         return "WHITESPACE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3592       case TokenNameCOMMENT_LINE :
3593         return "COMMENT_LINE(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3594       case TokenNameCOMMENT_BLOCK :
3595         return "COMMENT_BLOCK(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3596       case TokenNameCOMMENT_PHPDOC :
3597         return "COMMENT_PHPDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3598       case TokenNameHTML :
3599         return "HTML(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3600       default :
3601         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3602     }
3603   }
3604
3605   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace, boolean checkNonExternalizedStringLiterals) {
3606     this(tokenizeComments, tokenizeWhiteSpace, checkNonExternalizedStringLiterals, false);
3607   }
3608
3609   public Scanner(
3610     boolean tokenizeComments,
3611     boolean tokenizeWhiteSpace,
3612     boolean checkNonExternalizedStringLiterals,
3613     boolean assertMode) {
3614     this.eofPosition = Integer.MAX_VALUE;
3615     this.tokenizeComments = tokenizeComments;
3616     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3617     this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
3618     this.assertMode = assertMode;
3619   }
3620
3621   private void checkNonExternalizeString() throws InvalidInputException {
3622     if (currentLine == null)
3623       return;
3624     parseTags(currentLine);
3625   }
3626
3627   private void parseTags(NLSLine line) throws InvalidInputException {
3628     String s = new String(getCurrentTokenSource());
3629     int pos = s.indexOf(TAG_PREFIX);
3630     int lineLength = line.size();
3631     while (pos != -1) {
3632       int start = pos + TAG_PREFIX_LENGTH;
3633       int end = s.indexOf(TAG_POSTFIX, start);
3634       String index = s.substring(start, end);
3635       int i = 0;
3636       try {
3637         i = Integer.parseInt(index) - 1;
3638         // Tags are one based not zero based.
3639       } catch (NumberFormatException e) {
3640         i = -1; // we don't want to consider this as a valid NLS tag
3641       }
3642       if (line.exists(i)) {
3643         line.set(i, null);
3644       }
3645       pos = s.indexOf(TAG_PREFIX, start);
3646     }
3647
3648     this.nonNLSStrings = new StringLiteral[lineLength];
3649     int nonNLSCounter = 0;
3650     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3651       StringLiteral literal = (StringLiteral) iterator.next();
3652       if (literal != null) {
3653         this.nonNLSStrings[nonNLSCounter++] = literal;
3654       }
3655     }
3656     if (nonNLSCounter == 0) {
3657       this.nonNLSStrings = null;
3658       currentLine = null;
3659       return;
3660     }
3661     this.wasNonExternalizedStringLiteral = true;
3662     if (nonNLSCounter != lineLength) {
3663       System.arraycopy(this.nonNLSStrings, 0, (this.nonNLSStrings = new StringLiteral[nonNLSCounter]), 0, nonNLSCounter);
3664     }
3665     currentLine = null;
3666   }
3667
3668   public final void scanEscapeCharacter() throws InvalidInputException {
3669     // the string with "\\u" is a legal string of two chars \ and u
3670     //thus we use a direct access to the source (for regular cases).
3671
3672     if (unicodeAsBackSlash) {
3673       // consume next character
3674       unicodeAsBackSlash = false;
3675       //                        if (((currentCharacter = source[currentPosition++]) == '\\') && (source[currentPosition] == 'u')) {
3676       //                                getNextUnicodeChar();
3677       //                        } else {
3678       if (withoutUnicodePtr != 0) {
3679         withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3680         //                              }
3681       }
3682     } else
3683       currentCharacter = source[currentPosition++];
3684     switch (currentCharacter) {
3685       case 'b' :
3686         currentCharacter = '\b';
3687         break;
3688       case 't' :
3689         currentCharacter = '\t';
3690         break;
3691       case 'n' :
3692         currentCharacter = '\n';
3693         break;
3694       case 'f' :
3695         currentCharacter = '\f';
3696         break;
3697       case 'r' :
3698         currentCharacter = '\r';
3699         break;
3700       case '\"' :
3701         currentCharacter = '\"';
3702         break;
3703       case '\'' :
3704         currentCharacter = '\'';
3705         break;
3706       case '\\' :
3707         currentCharacter = '\\';
3708         break;
3709       default :
3710         // -----------octal escape--------------
3711         // OctalDigit
3712         // OctalDigit OctalDigit
3713         // ZeroToThree OctalDigit OctalDigit
3714
3715         int number = Character.getNumericValue(currentCharacter);
3716         if (number >= 0 && number <= 7) {
3717           boolean zeroToThreeNot = number > 3;
3718           if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3719             int digit = Character.getNumericValue(currentCharacter);
3720             if (digit >= 0 && digit <= 7) {
3721               number = (number * 8) + digit;
3722               if (Character.isDigit(currentCharacter = source[currentPosition++])) {
3723                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
3724                   currentPosition--;
3725                 } else {
3726                   digit = Character.getNumericValue(currentCharacter);
3727                   if (digit >= 0 && digit <= 7) { // has read \ZeroToThree OctalDigit OctalDigit
3728                     number = (number * 8) + digit;
3729                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
3730                     currentPosition--;
3731                   }
3732                 }
3733               } else { // has read \OctalDigit NonDigit--> ignore last character
3734                 currentPosition--;
3735               }
3736             } else { // has read \OctalDigit NonOctalDigit--> ignore last character
3737               currentPosition--;
3738             }
3739           } else { // has read \OctalDigit --> ignore last character
3740             currentPosition--;
3741           }
3742           if (number > 255)
3743             throw new InvalidInputException(INVALID_ESCAPE);
3744           currentCharacter = (char) number;
3745         } else
3746           throw new InvalidInputException(INVALID_ESCAPE);
3747     }
3748   }
3749
3750   // chech presence of task: tags
3751   public void checkTaskTag(int commentStart, int commentEnd) {
3752
3753     // only look for newer task: tags
3754     if (this.foundTaskCount > 0 && this.foundTaskPositions[this.foundTaskCount - 1][0] >= commentStart) {
3755       return;
3756     }
3757     int foundTaskIndex = this.foundTaskCount;
3758     nextChar : for (int i = commentStart; i < commentEnd && i < this.eofPosition; i++) {
3759
3760       char[] tag = null;
3761       char[] priority = null;
3762
3763       // check for tag occurrence
3764       nextTag : for (int itag = 0; itag < this.taskTags.length; itag++) {
3765         tag = this.taskTags[itag];
3766         priority = this.taskPriorities != null && itag < this.taskPriorities.length ? this.taskPriorities[itag] : null;
3767         int tagLength = tag.length;
3768         for (int t = 0; t < tagLength; t++) {
3769           if (this.source[i + t] != tag[t])
3770             continue nextTag;
3771         }
3772
3773         if (this.foundTaskTags == null) {
3774           this.foundTaskTags = new char[5][];
3775           this.foundTaskMessages = new char[5][];
3776           this.foundTaskPriorities = new char[5][];
3777           this.foundTaskPositions = new int[5][];
3778         } else if (this.foundTaskCount == this.foundTaskTags.length) {
3779           System.arraycopy(this.foundTaskTags, 0, this.foundTaskTags = new char[this.foundTaskCount * 2][], 0, this.foundTaskCount);
3780           System.arraycopy(
3781             this.foundTaskMessages,
3782             0,
3783             this.foundTaskMessages = new char[this.foundTaskCount * 2][],
3784             0,
3785             this.foundTaskCount);
3786           System.arraycopy(
3787             this.foundTaskPriorities,
3788             0,
3789             this.foundTaskPriorities = new char[this.foundTaskCount * 2][],
3790             0,
3791             this.foundTaskCount);
3792           System.arraycopy(
3793             this.foundTaskPositions,
3794             0,
3795             this.foundTaskPositions = new int[this.foundTaskCount * 2][],
3796             0,
3797             this.foundTaskCount);
3798         }
3799         this.foundTaskTags[this.foundTaskCount] = tag;
3800         this.foundTaskPriorities[this.foundTaskCount] = priority;
3801         this.foundTaskPositions[this.foundTaskCount] = new int[] { i, i + tagLength - 1 };
3802         this.foundTaskMessages[this.foundTaskCount] = CharOperation.NO_CHAR;
3803         this.foundTaskCount++;
3804
3805         i += tagLength - 1; // will be incremented when looping
3806       }
3807     }
3808
3809     for (int i = foundTaskIndex; i < this.foundTaskCount; i++) {
3810       // retrieve message start and end positions
3811       int msgStart = this.foundTaskPositions[i][0] + this.foundTaskTags[i].length;
3812       int max_value = i + 1 < this.foundTaskCount ? this.foundTaskPositions[i + 1][0] - 1 : commentEnd - 1;
3813       // at most beginning of next task
3814       if (max_value < msgStart)
3815         max_value = msgStart; // would only occur if tag is before EOF.
3816       int end = -1;
3817       char c;
3818
3819       for (int j = msgStart; j < max_value; j++) {
3820         if ((c = this.source[j]) == '\n' || c == '\r') {
3821           end = j - 1;
3822           break;
3823         }
3824       }
3825
3826       if (end == -1) {
3827         for (int j = max_value; j > msgStart; j--) {
3828           if ((c = this.source[j]) == '*') {
3829             end = j - 1;
3830             break;
3831           }
3832         }
3833         if (end == -1)
3834           end = max_value;
3835       }
3836
3837       if (msgStart == end)
3838         continue; // empty
3839
3840       // trim the message
3841       while (CharOperation.isWhitespace(source[end]) && msgStart <= end)
3842         end--;
3843       while (CharOperation.isWhitespace(source[msgStart]) && msgStart <= end)
3844         msgStart++;
3845
3846       // update the end position of the task
3847       this.foundTaskPositions[i][1] = end;
3848
3849       // get the message source
3850       final int messageLength = end - msgStart + 1;
3851       char[] message = new char[messageLength];
3852
3853       System.arraycopy(source, msgStart, message, 0, messageLength);
3854       this.foundTaskMessages[i] = message;
3855     }
3856   }
3857
3858 }