PHP highlighting colors could now be managed by the new PreferencePage
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19
20 public class Scanner implements IScanner, ITerminalSymbols {
21
22   /* APIs ares
23    - getNextToken() which return the current type of the token
24      (this value is not memorized by the scanner)
25    - getCurrentTokenSource() which provides with the token "REAL" source
26      (aka all unicode have been transformed into a correct char)
27    - sourceStart gives the position into the stream
28    - currentPosition-1 gives the sourceEnd position into the stream 
29   */
30
31   // 1.4 feature 
32   private boolean assertMode;
33   public boolean useAssertAsAnIndentifier = false;
34   //flag indicating if processed source contains occurrences of keyword assert 
35   public boolean containsAssertKeyword = false;
36
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the source
45
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52
53   //unicode support
54   public char[] withoutUnicodeBuffer;
55   public int withoutUnicodePtr;
56   //when == 0 ==> no unicode in the current token
57   public boolean unicodeAsBackSlash = false;
58
59   public boolean scanningFloatLiteral = false;
60
61   //support for /** comments
62   //public char[][] comments = new char[10][];
63   public int[] commentStops = new int[10];
64   public int[] commentStarts = new int[10];
65   public int commentPtr = -1; // no comment test with commentPtr value -1
66
67   //diet parsing support - jump over some method body when requested
68   public boolean diet = false;
69
70   //support for the  poor-line-debuggers ....
71   //remember the position of the cr/lf
72   public int[] lineEnds = new int[250];
73   public int linePtr = -1;
74   public boolean wasAcr = false;
75
76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
77
78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
85
86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
90
91   //----------------optimized identifier managment------------------
92   static final char[] charArray_a = new char[] { 'a' },
93     charArray_b = new char[] { 'b' },
94     charArray_c = new char[] { 'c' },
95     charArray_d = new char[] { 'd' },
96     charArray_e = new char[] { 'e' },
97     charArray_f = new char[] { 'f' },
98     charArray_g = new char[] { 'g' },
99     charArray_h = new char[] { 'h' },
100     charArray_i = new char[] { 'i' },
101     charArray_j = new char[] { 'j' },
102     charArray_k = new char[] { 'k' },
103     charArray_l = new char[] { 'l' },
104     charArray_m = new char[] { 'm' },
105     charArray_n = new char[] { 'n' },
106     charArray_o = new char[] { 'o' },
107     charArray_p = new char[] { 'p' },
108     charArray_q = new char[] { 'q' },
109     charArray_r = new char[] { 'r' },
110     charArray_s = new char[] { 's' },
111     charArray_t = new char[] { 't' },
112     charArray_u = new char[] { 'u' },
113     charArray_v = new char[] { 'v' },
114     charArray_w = new char[] { 'w' },
115     charArray_x = new char[] { 'x' },
116     charArray_y = new char[] { 'y' },
117     charArray_z = new char[] { 'z' };
118
119   static final char[] initCharArray =
120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121   static final int TableSize = 30, InternalTableSize = 6;
122   //30*6 = 180 entries
123   public static final int OptimizedLength = 6;
124   public /*static*/
125   final char[][][][] charArray_length =
126     new char[OptimizedLength][TableSize][InternalTableSize][];
127   // support for detecting non-externalized string literals
128   int currentLineNr = -1;
129   int previousLineNr = -1;
130   NLSLine currentLine = null;
131   List lines = new ArrayList();
132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136   public StringLiteral[] nonNLSStrings = null;
137   public boolean checkNonExternalizedStringLiterals = true;
138   public boolean wasNonExternalizedStringLiteral = false;
139
140   /*static*/ {
141     for (int i = 0; i < 6; i++) {
142       for (int j = 0; j < TableSize; j++) {
143         for (int k = 0; k < InternalTableSize; k++) {
144           charArray_length[i][j][k] = initCharArray;
145         }
146       }
147     }
148   }
149   static int newEntry2 = 0,
150     newEntry3 = 0,
151     newEntry4 = 0,
152     newEntry5 = 0,
153     newEntry6 = 0;
154
155   public static final int RoundBracket = 0;
156   public static final int SquareBracket = 1;
157   public static final int CurlyBracket = 2;
158   public static final int BracketKinds = 3;
159
160   public static final boolean DEBUG = false;
161   
162   public Scanner() {
163     this(false, false);
164   }
165   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
166     this(tokenizeComments, tokenizeWhiteSpace, false);
167   }
168
169   /**
170    * Determines if the specified character is
171    * permissible as the first character in a PHP identifier
172    */
173   public static boolean isPHPIdentifierStart(char ch) {
174     return Character.isLetter(ch)
175       || (ch == '_')
176       || (0x7F <= ch && ch <= 0xFF);
177   }
178
179   /**
180    * Determines if the specified character may be part of a PHP identifier as
181    * other than the first character
182    */
183   public static boolean isPHPIdentifierPart(char ch) {
184     return Character.isLetterOrDigit(ch)
185       || (ch == '_')
186       || (0x7F <= ch && ch <= 0xFF);
187   }
188
189   public final boolean atEnd() {
190     // This code is not relevant if source is 
191     // Only a part of the real stream input
192
193     return source.length == currentPosition;
194   }
195   public char[] getCurrentIdentifierSource() {
196     //return the token REAL source (aka unicodes are precomputed)
197
198     char[] result;
199 //    if (withoutUnicodePtr != 0)
200 //      //0 is used as a fast test flag so the real first char is in position 1
201 //      System.arraycopy(
202 //        withoutUnicodeBuffer,
203 //        1,
204 //        result = new char[withoutUnicodePtr],
205 //        0,
206 //        withoutUnicodePtr);
207 //    else {
208       int length = currentPosition - startPosition;
209       switch (length) { // see OptimizedLength
210         case 1 :
211           return optimizedCurrentTokenSource1();
212         case 2 :
213           return optimizedCurrentTokenSource2();
214         case 3 :
215           return optimizedCurrentTokenSource3();
216         case 4 :
217           return optimizedCurrentTokenSource4();
218         case 5 :
219           return optimizedCurrentTokenSource5();
220         case 6 :
221           return optimizedCurrentTokenSource6();
222       }
223       //no optimization
224       System.arraycopy(
225         source,
226         startPosition,
227         result = new char[length],
228         0,
229         length);
230  //   }
231     return result;
232   }
233   public int getCurrentTokenEndPosition() {
234     return this.currentPosition - 1;
235   }
236   public final char[] getCurrentTokenSource() {
237     // Return the token REAL source (aka unicodes are precomputed)
238
239     char[] result;
240 //    if (withoutUnicodePtr != 0)
241 //      // 0 is used as a fast test flag so the real first char is in position 1
242 //      System.arraycopy(
243 //        withoutUnicodeBuffer,
244 //        1,
245 //        result = new char[withoutUnicodePtr],
246 //        0,
247 //        withoutUnicodePtr);
248 //    else {
249       int length;
250       System.arraycopy(
251         source,
252         startPosition,
253         result = new char[length = currentPosition - startPosition],
254         0,
255         length);
256 //    }
257     return result;
258   }
259
260   public final char[] getCurrentTokenSource(int startPos) {
261     // Return the token REAL source (aka unicodes are precomputed)
262
263     char[] result;
264 //    if (withoutUnicodePtr != 0)
265 //      // 0 is used as a fast test flag so the real first char is in position 1
266 //      System.arraycopy(
267 //        withoutUnicodeBuffer,
268 //        1,
269 //        result = new char[withoutUnicodePtr],
270 //        0,
271 //        withoutUnicodePtr);
272 //    else {
273       int length;
274       System.arraycopy(
275         source,
276         startPos,
277         result = new char[length = currentPosition - startPos],
278         0,
279         length);
280   //  }
281     return result;
282   }
283
284   public final char[] getCurrentTokenSourceString() {
285     //return the token REAL source (aka unicodes are precomputed).
286     //REMOVE the two " that are at the beginning and the end.
287
288     char[] result;
289     if (withoutUnicodePtr != 0)
290       //0 is used as a fast test flag so the real first char is in position 1
291       System.arraycopy(withoutUnicodeBuffer, 2,
292       //2 is 1 (real start) + 1 (to jump over the ")
293       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
294     else {
295       int length;
296       System.arraycopy(
297         source,
298         startPosition + 1,
299         result = new char[length = currentPosition - startPosition - 2],
300         0,
301         length);
302     }
303     return result;
304   }
305   public int getCurrentTokenStartPosition() {
306     return this.startPosition;
307   }
308   /*
309    * Search the source position corresponding to the end of a given line number
310    *
311    * Line numbers are 1-based, and relative to the scanner initialPosition. 
312    * Character positions are 0-based.
313    *
314    * In case the given line number is inconsistent, answers -1.
315    */
316   public final int getLineEnd(int lineNumber) {
317
318     if (lineEnds == null)
319       return -1;
320     if (lineNumber >= lineEnds.length)
321       return -1;
322     if (lineNumber <= 0)
323       return -1;
324
325     if (lineNumber == lineEnds.length - 1)
326       return eofPosition;
327     return lineEnds[lineNumber - 1];
328     // next line start one character behind the lineEnd of the previous line
329   }
330   /**
331    * Search the source position corresponding to the beginning of a given line number
332    *
333    * Line numbers are 1-based, and relative to the scanner initialPosition. 
334    * Character positions are 0-based.
335    *
336    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
337    *
338    * In case the given line number is inconsistent, answers -1.
339    */
340   public final int getLineStart(int lineNumber) {
341
342     if (lineEnds == null)
343       return -1;
344     if (lineNumber >= lineEnds.length)
345       return -1;
346     if (lineNumber <= 0)
347       return -1;
348
349     if (lineNumber == 1)
350       return initialPosition;
351     return lineEnds[lineNumber - 2] + 1;
352     // next line start one character behind the lineEnd of the previous line
353   }
354   public final boolean getNextChar(char testedChar) {
355     //BOOLEAN
356     //handle the case of unicode.
357     //when a unicode appears then we must use a buffer that holds char internal values
358     //At the end of this method currentCharacter holds the new visited char
359     //and currentPosition points right next after it
360     //Both previous lines are true if the currentCharacter is == to the testedChar
361     //On false, no side effect has occured.
362
363     //ALL getNextChar.... ARE OPTIMIZED COPIES 
364
365     int temp = currentPosition;
366     try {
367       currentCharacter = source[currentPosition++];
368 //      if (((currentCharacter = source[currentPosition++]) == '\\')
369 //        && (source[currentPosition] == 'u')) {
370 //        //-------------unicode traitement ------------
371 //        int c1, c2, c3, c4;
372 //        int unicodeSize = 6;
373 //        currentPosition++;
374 //        while (source[currentPosition] == 'u') {
375 //          currentPosition++;
376 //          unicodeSize++;
377 //        }
378 //
379 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
380 //          || c1 < 0)
381 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
382 //            || c2 < 0)
383 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
384 //            || c3 < 0)
385 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
386 //            || c4 < 0)) {
387 //          currentPosition = temp;
388 //          return false;
389 //        }
390 //
391 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
392 //        if (currentCharacter != testedChar) {
393 //          currentPosition = temp;
394 //          return false;
395 //        }
396 //        unicodeAsBackSlash = currentCharacter == '\\';
397 //
398 //        //need the unicode buffer
399 //        if (withoutUnicodePtr == 0) {
400 //          //buffer all the entries that have been left aside....
401 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
402 //          System.arraycopy(
403 //            source,
404 //            startPosition,
405 //            withoutUnicodeBuffer,
406 //            1,
407 //            withoutUnicodePtr);
408 //        }
409 //        //fill the buffer with the char
410 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
411 //        return true;
412 //
413 //      } //-------------end unicode traitement--------------
414 //      else {
415         if (currentCharacter != testedChar) {
416           currentPosition = temp;
417           return false;
418         }
419         unicodeAsBackSlash = false;
420 //        if (withoutUnicodePtr != 0)
421 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
422         return true;
423 //      }
424     } catch (IndexOutOfBoundsException e) {
425       unicodeAsBackSlash = false;
426       currentPosition = temp;
427       return false;
428     }
429   }
430   public final int getNextChar(char testedChar1, char testedChar2) {
431     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
432     //test can be done with (x==0) for the first and (x>0) for the second
433     //handle the case of unicode.
434     //when a unicode appears then we must use a buffer that holds char internal values
435     //At the end of this method currentCharacter holds the new visited char
436     //and currentPosition points right next after it
437     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
438     //On false, no side effect has occured.
439
440     //ALL getNextChar.... ARE OPTIMIZED COPIES 
441
442     int temp = currentPosition;
443     try {
444       int result;
445       currentCharacter = source[currentPosition++];
446 //      if (((currentCharacter = source[currentPosition++]) == '\\')
447 //        && (source[currentPosition] == 'u')) {
448 //        //-------------unicode traitement ------------
449 //        int c1, c2, c3, c4;
450 //        int unicodeSize = 6;
451 //        currentPosition++;
452 //        while (source[currentPosition] == 'u') {
453 //          currentPosition++;
454 //          unicodeSize++;
455 //        }
456 //
457 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
458 //          || c1 < 0)
459 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
460 //            || c2 < 0)
461 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
462 //            || c3 < 0)
463 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
464 //            || c4 < 0)) {
465 //          currentPosition = temp;
466 //          return 2;
467 //        }
468 //
469 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
470 //        if (currentCharacter == testedChar1)
471 //          result = 0;
472 //        else if (currentCharacter == testedChar2)
473 //          result = 1;
474 //        else {
475 //          currentPosition = temp;
476 //          return -1;
477 //        }
478 //
479 //        //need the unicode buffer
480 //        if (withoutUnicodePtr == 0) {
481 //          //buffer all the entries that have been left aside....
482 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
483 //          System.arraycopy(
484 //            source,
485 //            startPosition,
486 //            withoutUnicodeBuffer,
487 //            1,
488 //            withoutUnicodePtr);
489 //        }
490 //        //fill the buffer with the char
491 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
492 //        return result;
493 //      } //-------------end unicode traitement--------------
494 //      else {
495         if (currentCharacter == testedChar1)
496           result = 0;
497         else if (currentCharacter == testedChar2)
498           result = 1;
499         else {
500           currentPosition = temp;
501           return -1;
502         }
503
504 //        if (withoutUnicodePtr != 0)
505 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
506         return result;
507  //     }
508     } catch (IndexOutOfBoundsException e) {
509       currentPosition = temp;
510       return -1;
511     }
512   }
513   public final boolean getNextCharAsDigit() {
514     //BOOLEAN
515     //handle the case of unicode.
516     //when a unicode appears then we must use a buffer that holds char internal values
517     //At the end of this method currentCharacter holds the new visited char
518     //and currentPosition points right next after it
519     //Both previous lines are true if the currentCharacter is a digit
520     //On false, no side effect has occured.
521
522     //ALL getNextChar.... ARE OPTIMIZED COPIES 
523
524     int temp = currentPosition;
525     try {
526       currentCharacter = source[currentPosition++];
527 //      if (((currentCharacter = source[currentPosition++]) == '\\')
528 //        && (source[currentPosition] == 'u')) {
529 //        //-------------unicode traitement ------------
530 //        int c1, c2, c3, c4;
531 //        int unicodeSize = 6;
532 //        currentPosition++;
533 //        while (source[currentPosition] == 'u') {
534 //          currentPosition++;
535 //          unicodeSize++;
536 //        }
537 //
538 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
539 //          || c1 < 0)
540 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
541 //            || c2 < 0)
542 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
543 //            || c3 < 0)
544 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
545 //            || c4 < 0)) {
546 //          currentPosition = temp;
547 //          return false;
548 //        }
549 //
550 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
551 //        if (!Character.isDigit(currentCharacter)) {
552 //          currentPosition = temp;
553 //          return false;
554 //        }
555 //
556 //        //need the unicode buffer
557 //        if (withoutUnicodePtr == 0) {
558 //          //buffer all the entries that have been left aside....
559 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
560 //          System.arraycopy(
561 //            source,
562 //            startPosition,
563 //            withoutUnicodeBuffer,
564 //            1,
565 //            withoutUnicodePtr);
566 //        }
567 //        //fill the buffer with the char
568 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
569 //        return true;
570 //      } //-------------end unicode traitement--------------
571 //      else {
572         if (!Character.isDigit(currentCharacter)) {
573           currentPosition = temp;
574           return false;
575         }
576 //        if (withoutUnicodePtr != 0)
577 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
578         return true;
579 //      }
580     } catch (IndexOutOfBoundsException e) {
581       currentPosition = temp;
582       return false;
583     }
584   }
585   public final boolean getNextCharAsDigit(int radix) {
586     //BOOLEAN
587     //handle the case of unicode.
588     //when a unicode appears then we must use a buffer that holds char internal values
589     //At the end of this method currentCharacter holds the new visited char
590     //and currentPosition points right next after it
591     //Both previous lines are true if the currentCharacter is a digit base on radix
592     //On false, no side effect has occured.
593
594     //ALL getNextChar.... ARE OPTIMIZED COPIES 
595
596     int temp = currentPosition;
597     try {
598       currentCharacter = source[currentPosition++];
599 //      if (((currentCharacter = source[currentPosition++]) == '\\')
600 //        && (source[currentPosition] == 'u')) {
601 //        //-------------unicode traitement ------------
602 //        int c1, c2, c3, c4;
603 //        int unicodeSize = 6;
604 //        currentPosition++;
605 //        while (source[currentPosition] == 'u') {
606 //          currentPosition++;
607 //          unicodeSize++;
608 //        }
609 //
610 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
611 //          || c1 < 0)
612 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
613 //            || c2 < 0)
614 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
615 //            || c3 < 0)
616 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
617 //            || c4 < 0)) {
618 //          currentPosition = temp;
619 //          return false;
620 //        }
621 //
622 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
623 //        if (Character.digit(currentCharacter, radix) == -1) {
624 //          currentPosition = temp;
625 //          return false;
626 //        }
627 //
628 //        //need the unicode buffer
629 //        if (withoutUnicodePtr == 0) {
630 //          //buffer all the entries that have been left aside....
631 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
632 //          System.arraycopy(
633 //            source,
634 //            startPosition,
635 //            withoutUnicodeBuffer,
636 //            1,
637 //            withoutUnicodePtr);
638 //        }
639 //        //fill the buffer with the char
640 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
641 //        return true;
642 //      } //-------------end unicode traitement--------------
643 //      else {
644         if (Character.digit(currentCharacter, radix) == -1) {
645           currentPosition = temp;
646           return false;
647         }
648 //        if (withoutUnicodePtr != 0)
649 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
650         return true;
651 //      }
652     } catch (IndexOutOfBoundsException e) {
653       currentPosition = temp;
654       return false;
655     }
656   }
657   public boolean getNextCharAsJavaIdentifierPart() {
658     //BOOLEAN
659     //handle the case of unicode.
660     //when a unicode appears then we must use a buffer that holds char internal values
661     //At the end of this method currentCharacter holds the new visited char
662     //and currentPosition points right next after it
663     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
664     //On false, no side effect has occured.
665
666     //ALL getNextChar.... ARE OPTIMIZED COPIES 
667
668     int temp = currentPosition;
669     try {
670       currentCharacter = source[currentPosition++];
671 //      if (((currentCharacter = source[currentPosition++]) == '\\')
672 //        && (source[currentPosition] == 'u')) {
673 //        //-------------unicode traitement ------------
674 //        int c1, c2, c3, c4;
675 //        int unicodeSize = 6;
676 //        currentPosition++;
677 //        while (source[currentPosition] == 'u') {
678 //          currentPosition++;
679 //          unicodeSize++;
680 //        }
681 //
682 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
683 //          || c1 < 0)
684 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
685 //            || c2 < 0)
686 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
687 //            || c3 < 0)
688 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
689 //            || c4 < 0)) {
690 //          currentPosition = temp;
691 //          return false;
692 //        }
693 //
694 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
695 //        if (!isPHPIdentifierPart(currentCharacter)) {
696 //          currentPosition = temp;
697 //          return false;
698 //        }
699 //
700 //        //need the unicode buffer
701 //        if (withoutUnicodePtr == 0) {
702 //          //buffer all the entries that have been left aside....
703 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
704 //          System.arraycopy(
705 //            source,
706 //            startPosition,
707 //            withoutUnicodeBuffer,
708 //            1,
709 //            withoutUnicodePtr);
710 //        }
711 //        //fill the buffer with the char
712 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
713 //        return true;
714 //      } //-------------end unicode traitement--------------
715 //      else {
716         if (!isPHPIdentifierPart(currentCharacter)) {
717           currentPosition = temp;
718           return false;
719         }
720
721 //        if (withoutUnicodePtr != 0)
722 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
723         return true;
724 //      }
725     } catch (IndexOutOfBoundsException e) {
726       currentPosition = temp;
727       return false;
728     }
729   }
730
731   public int getNextToken() throws InvalidInputException {
732     int htmlPosition = currentPosition;
733     try {
734       while (!phpMode) {
735         currentCharacter = source[currentPosition++];
736         if (currentCharacter == '<') {
737           if (getNextChar('?')) {
738             currentCharacter = source[currentPosition++];
739             if ((currentCharacter == ' ')
740               || Character.isWhitespace(currentCharacter)) {
741               // <?
742               startPosition = currentPosition;
743               phpMode = true;
744               if (tokenizeWhiteSpace) {
745                 // && (whiteStart != currentPosition - 1)) {
746                 // reposition scanner in case we are interested by spaces as tokens
747                 startPosition = htmlPosition;
748                 return TokenNameHTML;
749               }
750             } else {
751               boolean phpStart =
752                 (currentCharacter == 'P') || (currentCharacter == 'p');
753               if (phpStart) {
754                 int test = getNextChar('H', 'h');
755                 if (test >= 0) {
756                   test = getNextChar('P', 'p');
757                   if (test >= 0) {
758                     // <?PHP  <?php
759                     startPosition = currentPosition;
760                     phpMode = true;
761
762                     if (tokenizeWhiteSpace) {
763                       // && (whiteStart != currentPosition - 1)) {
764                       // reposition scanner in case we are interested by spaces as tokens
765                       startPosition = htmlPosition;
766                       return TokenNameHTML;
767                     }
768                   }
769                 }
770               }
771             }
772           }
773         }
774
775         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
776           if (recordLineSeparator) {
777             pushLineSeparator();
778           } else {
779             currentLine = null;
780           }
781         }
782       }
783     } //-----------------end switch while try--------------------
784     catch (IndexOutOfBoundsException e) {
785       if (tokenizeWhiteSpace) {
786         // && (whiteStart != currentPosition - 1)) {
787         // reposition scanner in case we are interested by spaces as tokens
788         startPosition = htmlPosition;
789       }
790       return TokenNameEOF;
791     }
792
793     if (phpMode) {
794       this.wasAcr = false;
795       if (diet) {
796         jumpOverMethodBody();
797         diet = false;
798         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
799       }
800       try {
801         while (true) { //loop for jumping over comments
802           withoutUnicodePtr = 0;
803           //start with a new token (even comment written with unicode )
804
805           // ---------Consume white space and handles startPosition---------
806           int whiteStart = currentPosition;
807           boolean isWhiteSpace;
808           do {
809             startPosition = currentPosition;
810             currentCharacter = source[currentPosition++];
811 //            if (((currentCharacter = source[currentPosition++]) == '\\')
812 //              && (source[currentPosition] == 'u')) {
813 //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
814 //            } else {
815               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
816                 checkNonExternalizeString();
817                 if (recordLineSeparator) {
818                   pushLineSeparator();
819                 } else {
820                   currentLine = null;
821                 }
822               }
823               isWhiteSpace =
824                 (currentCharacter == ' ')
825                   || Character.isWhitespace(currentCharacter);
826 //            }
827           } while (isWhiteSpace);
828           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
829             // reposition scanner in case we are interested by spaces as tokens
830             currentPosition--;
831             startPosition = whiteStart;
832             return TokenNameWHITESPACE;
833           }
834           //little trick to get out in the middle of a source compuation
835           if (currentPosition > eofPosition)
836             return TokenNameEOF;
837
838           // ---------Identify the next token-------------
839
840           switch (currentCharacter) {
841             case '(' :
842               return TokenNameLPAREN;
843             case ')' :
844               return TokenNameRPAREN;
845             case '{' :
846               return TokenNameLBRACE;
847             case '}' :
848               return TokenNameRBRACE;
849             case '[' :
850               return TokenNameLBRACKET;
851             case ']' :
852               return TokenNameRBRACKET;
853             case ';' :
854               return TokenNameSEMICOLON;
855             case ',' :
856               return TokenNameCOMMA;
857
858             case '.' :
859               if (getNextCharAsDigit())
860                 return scanNumber(true);
861               return TokenNameDOT;
862             case '+' :
863               {
864                 int test;
865                 if ((test = getNextChar('+', '=')) == 0)
866                   return TokenNamePLUS_PLUS;
867                 if (test > 0)
868                   return TokenNamePLUS_EQUAL;
869                 return TokenNamePLUS;
870               }
871             case '-' :
872               {
873                 int test;
874                 if ((test = getNextChar('-', '=')) == 0)
875                   return TokenNameMINUS_MINUS;
876                 if (test > 0)
877                   return TokenNameMINUS_EQUAL;
878                 if (getNextChar('>'))
879                   return TokenNameMINUS_GREATER;
880
881                 return TokenNameMINUS;
882               }
883             case '~' :
884               if (getNextChar('='))
885                 return TokenNameTWIDDLE_EQUAL;
886               return TokenNameTWIDDLE;
887             case '!' :
888               if (getNextChar('='))
889                 return TokenNameNOT_EQUAL;
890               return TokenNameNOT;
891             case '*' :
892               if (getNextChar('='))
893                 return TokenNameMULTIPLY_EQUAL;
894               return TokenNameMULTIPLY;
895             case '%' :
896               if (getNextChar('='))
897                 return TokenNameREMAINDER_EQUAL;
898               return TokenNameREMAINDER;
899             case '<' :
900               {
901                 int test;
902                 if ((test = getNextChar('=', '<')) == 0)
903                   return TokenNameLESS_EQUAL;
904                 if (test > 0) {
905                   if (getNextChar('='))
906                     return TokenNameLEFT_SHIFT_EQUAL;
907                   if (getNextChar('<')) {
908                     int heredocStart = currentPosition;
909                     int heredocLength = 0;
910                     currentCharacter = source[currentPosition++];
911                     if (isPHPIdentifierStart(currentCharacter)) {
912                       currentCharacter = source[currentPosition++];
913                     } else {
914                       return TokenNameERROR;
915                     }
916                     while (isPHPIdentifierPart(currentCharacter)) {
917                       currentCharacter = source[currentPosition++];
918                     }
919
920                     heredocLength = currentPosition - heredocStart - 1;
921
922                     // heredoc end-tag determination
923                     boolean endTag = true;
924                     char ch;
925                     do {
926                       ch = source[currentPosition++];
927                       if (ch == '\r' || ch == '\n') {
928                         if (recordLineSeparator) {
929                           pushLineSeparator();
930                         } else {
931                           currentLine = null;
932                         }
933                         for (int i = 0; i < heredocLength; i++) {
934                           if (source[currentPosition + i]
935                             != source[heredocStart + i]) {
936                             endTag = false;
937                             break;
938                           }
939                         }
940                         if (endTag) {
941                           currentPosition += heredocLength - 1;
942                           currentCharacter = source[currentPosition++];
943                           break; // do...while loop
944                         } else {
945                           endTag = true;
946                         }
947                       }
948
949                     } while (true);
950
951                     return TokenNameHEREDOC;
952                   }
953                   return TokenNameLEFT_SHIFT;
954                 }
955                 return TokenNameLESS;
956               }
957             case '>' :
958               {
959                 int test;
960                 if ((test = getNextChar('=', '>')) == 0)
961                   return TokenNameGREATER_EQUAL;
962                 if (test > 0) {
963                   if ((test = getNextChar('=', '>')) == 0)
964                     return TokenNameRIGHT_SHIFT_EQUAL;
965                   return TokenNameRIGHT_SHIFT;
966                 }
967                 return TokenNameGREATER;
968               }
969             case '=' :
970               if (getNextChar('='))
971                 return TokenNameEQUAL_EQUAL;
972               if (getNextChar('>'))
973                 return TokenNameEQUAL_GREATER;
974               return TokenNameEQUAL;
975             case '&' :
976               {
977                 int test;
978                 if ((test = getNextChar('&', '=')) == 0)
979                   return TokenNameAND_AND;
980                 if (test > 0)
981                   return TokenNameAND_EQUAL;
982                 return TokenNameAND;
983               }
984             case '|' :
985               {
986                 int test;
987                 if ((test = getNextChar('|', '=')) == 0)
988                   return TokenNameOR_OR;
989                 if (test > 0)
990                   return TokenNameOR_EQUAL;
991                 return TokenNameOR;
992               }
993             case '^' :
994               if (getNextChar('='))
995                 return TokenNameXOR_EQUAL;
996               return TokenNameXOR;
997             case '?' :
998               if (getNextChar('>')) {
999                 phpMode = false;
1000                 return TokenNameStopPHP;
1001               }
1002               return TokenNameQUESTION;
1003             case ':' :
1004               if (getNextChar(':'))
1005                 return TokenNameCOLON_COLON;
1006               return TokenNameCOLON;
1007             case '@' :
1008               return TokenNameAT;
1009               //                                        case '\'' :
1010               //                                                {
1011               //                                                        int test;
1012               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1013               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1014               //                                                        }
1015               //                                                        if (test > 0) {
1016               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1017               //                                                                for (int lookAhead = 0;
1018               //                                                                        lookAhead < 3;
1019               //                                                                        lookAhead++) {
1020               //                                                                        if (currentPosition + lookAhead
1021               //                                                                                == source.length)
1022               //                                                                                break;
1023               //                                                                        if (source[currentPosition + lookAhead]
1024               //                                                                                == '\n')
1025               //                                                                                break;
1026               //                                                                        if (source[currentPosition + lookAhead]
1027               //                                                                                == '\'') {
1028               //                                                                                currentPosition += lookAhead + 1;
1029               //                                                                                break;
1030               //                                                                        }
1031               //                                                                }
1032               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1033               //                                                        }
1034               //                                                }
1035               //                                                if (getNextChar('\'')) {
1036               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1037               //                                                        for (int lookAhead = 0;
1038               //                                                                lookAhead < 3;
1039               //                                                                lookAhead++) {
1040               //                                                                if (currentPosition + lookAhead
1041               //                                                                        == source.length)
1042               //                                                                        break;
1043               //                                                                if (source[currentPosition + lookAhead]
1044               //                                                                        == '\n')
1045               //                                                                        break;
1046               //                                                                if (source[currentPosition + lookAhead]
1047               //                                                                        == '\'') {
1048               //                                                                        currentPosition += lookAhead + 1;
1049               //                                                                        break;
1050               //                                                                }
1051               //                                                        }
1052               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1053               //                                                }
1054               //                                                if (getNextChar('\\'))
1055               //                                                        scanEscapeCharacter();
1056               //                                                else { // consume next character
1057               //                                                        unicodeAsBackSlash = false;
1058               //                                                        if (((currentCharacter = source[currentPosition++])
1059               //                                                                == '\\')
1060               //                                                                && (source[currentPosition] == 'u')) {
1061               //                                                                getNextUnicodeChar();
1062               //                                                        } else {
1063               //                                                                if (withoutUnicodePtr != 0) {
1064               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1065               //                                                                                currentCharacter;
1066               //                                                                }
1067               //                                                        }
1068               //                                                }
1069               //                                                //            if (getNextChar('\''))
1070               //                                                //              return TokenNameCharacterLiteral;
1071               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1072               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1073               //                                                        if (currentPosition + lookAhead == source.length)
1074               //                                                                break;
1075               //                                                        if (source[currentPosition + lookAhead] == '\n')
1076               //                                                                break;
1077               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1078               //                                                                currentPosition += lookAhead + 1;
1079               //                                                                break;
1080               //                                                        }
1081               //                                                }
1082               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1083             case '\'' :
1084               try {
1085                 // consume next character
1086                 unicodeAsBackSlash = false;
1087                 currentCharacter = source[currentPosition++];
1088 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1089 //                  && (source[currentPosition] == 'u')) {
1090 //                  getNextUnicodeChar();
1091 //                } else {
1092 //                  if (withoutUnicodePtr != 0) {
1093 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1094 //                      currentCharacter;
1095 //                  }
1096 //                }
1097
1098                 while (currentCharacter != '\'') {
1099
1100                   /**** in PHP \r and \n are valid in string literals ****/
1101                   //                  if ((currentCharacter == '\n')
1102                   //                    || (currentCharacter == '\r')) {
1103                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1104                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1105                   //                      if (currentPosition + lookAhead == source.length)
1106                   //                        break;
1107                   //                      if (source[currentPosition + lookAhead] == '\n')
1108                   //                        break;
1109                   //                      if (source[currentPosition + lookAhead] == '\"') {
1110                   //                        currentPosition += lookAhead + 1;
1111                   //                        break;
1112                   //                      }
1113                   //                    }
1114                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1115                   //                  }
1116                   if (currentCharacter == '\\') {
1117                     int escapeSize = currentPosition;
1118                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1119                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1120                     scanSingleQuotedEscapeCharacter();
1121                     escapeSize = currentPosition - escapeSize;
1122                     if (withoutUnicodePtr == 0) {
1123                       //buffer all the entries that have been left aside....
1124                       withoutUnicodePtr =
1125                         currentPosition - escapeSize - 1 - startPosition;
1126                       System.arraycopy(
1127                         source,
1128                         startPosition,
1129                         withoutUnicodeBuffer,
1130                         1,
1131                         withoutUnicodePtr);
1132                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1133                         currentCharacter;
1134                     } else { //overwrite the / in the buffer
1135                       withoutUnicodeBuffer[withoutUnicodePtr] =
1136                         currentCharacter;
1137                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1138                         withoutUnicodePtr--;
1139                       }
1140                     }
1141                   }
1142                   // consume next character
1143                   unicodeAsBackSlash = false;
1144                   currentCharacter = source[currentPosition++];
1145 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1146 //                    && (source[currentPosition] == 'u')) {
1147 //                    getNextUnicodeChar();
1148 //                  } else {
1149                     if (withoutUnicodePtr != 0) {
1150                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1151                         currentCharacter;
1152                     }
1153 //                  }
1154
1155                 }
1156               } catch (IndexOutOfBoundsException e) {
1157                 throw new InvalidInputException(UNTERMINATED_STRING);
1158               } catch (InvalidInputException e) {
1159                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1160                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1161                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1162                     if (currentPosition + lookAhead == source.length)
1163                       break;
1164                     if (source[currentPosition + lookAhead] == '\n')
1165                       break;
1166                     if (source[currentPosition + lookAhead] == '\'') {
1167                       currentPosition += lookAhead + 1;
1168                       break;
1169                     }
1170                   }
1171
1172                 }
1173                 throw e; // rethrow
1174               }
1175               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1176                 if (currentLine == null) {
1177                   currentLine = new NLSLine();
1178                   lines.add(currentLine);
1179                 }
1180                 currentLine.add(
1181                   new StringLiteral(
1182                     getCurrentTokenSourceString(),
1183                     startPosition,
1184                     currentPosition - 1));
1185               }
1186               return TokenNameStringConstant;
1187             case '"' :
1188               try {
1189                 // consume next character
1190                 unicodeAsBackSlash = false;
1191                 currentCharacter = source[currentPosition++];
1192 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1193 //                  && (source[currentPosition] == 'u')) {
1194 //                  getNextUnicodeChar();
1195 //                } else {
1196 //                  if (withoutUnicodePtr != 0) {
1197 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1198 //                      currentCharacter;
1199 //                  }
1200 //                }
1201
1202                 while (currentCharacter != '"') {
1203
1204                   /**** in PHP \r and \n are valid in string literals ****/
1205                   //                  if ((currentCharacter == '\n')
1206                   //                    || (currentCharacter == '\r')) {
1207                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1208                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1209                   //                      if (currentPosition + lookAhead == source.length)
1210                   //                        break;
1211                   //                      if (source[currentPosition + lookAhead] == '\n')
1212                   //                        break;
1213                   //                      if (source[currentPosition + lookAhead] == '\"') {
1214                   //                        currentPosition += lookAhead + 1;
1215                   //                        break;
1216                   //                      }
1217                   //                    }
1218                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1219                   //                  }
1220                   if (currentCharacter == '\\') {
1221                     int escapeSize = currentPosition;
1222                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1223                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1224                     scanDoubleQuotedEscapeCharacter();
1225                     escapeSize = currentPosition - escapeSize;
1226                     if (withoutUnicodePtr == 0) {
1227                       //buffer all the entries that have been left aside....
1228                       withoutUnicodePtr =
1229                         currentPosition - escapeSize - 1 - startPosition;
1230                       System.arraycopy(
1231                         source,
1232                         startPosition,
1233                         withoutUnicodeBuffer,
1234                         1,
1235                         withoutUnicodePtr);
1236                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1237                         currentCharacter;
1238                     } else { //overwrite the / in the buffer
1239                       withoutUnicodeBuffer[withoutUnicodePtr] =
1240                         currentCharacter;
1241                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1242                         withoutUnicodePtr--;
1243                       }
1244                     }
1245                   }
1246                   // consume next character
1247                   unicodeAsBackSlash = false;
1248                   currentCharacter = source[currentPosition++];
1249 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1250 //                    && (source[currentPosition] == 'u')) {
1251 //                    getNextUnicodeChar();
1252 //                  } else {
1253                     if (withoutUnicodePtr != 0) {
1254                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1255                         currentCharacter;
1256                     }
1257 //                  }
1258
1259                 }
1260               } catch (IndexOutOfBoundsException e) {
1261                 throw new InvalidInputException(UNTERMINATED_STRING);
1262               } catch (InvalidInputException e) {
1263                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1264                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1265                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1266                     if (currentPosition + lookAhead == source.length)
1267                       break;
1268                     if (source[currentPosition + lookAhead] == '\n')
1269                       break;
1270                     if (source[currentPosition + lookAhead] == '\"') {
1271                       currentPosition += lookAhead + 1;
1272                       break;
1273                     }
1274                   }
1275
1276                 }
1277                 throw e; // rethrow
1278               }
1279               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1280                 if (currentLine == null) {
1281                   currentLine = new NLSLine();
1282                   lines.add(currentLine);
1283                 }
1284                 currentLine.add(
1285                   new StringLiteral(
1286                     getCurrentTokenSourceString(),
1287                     startPosition,
1288                     currentPosition - 1));
1289               }
1290               return TokenNameStringLiteral;
1291             case '`' :
1292               try {
1293                 // consume next character
1294                 unicodeAsBackSlash = false;
1295                 currentCharacter = source[currentPosition++];
1296 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1297 //                  && (source[currentPosition] == 'u')) {
1298 //                  getNextUnicodeChar();
1299 //                } else {
1300 //                  if (withoutUnicodePtr != 0) {
1301 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1302 //                      currentCharacter;
1303 //                  }
1304 //                }
1305
1306                 while (currentCharacter != '`') {
1307
1308                   /**** in PHP \r and \n are valid in string literals ****/
1309                   //                if ((currentCharacter == '\n')
1310                   //                  || (currentCharacter == '\r')) {
1311                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1312                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1313                   //                    if (currentPosition + lookAhead == source.length)
1314                   //                      break;
1315                   //                    if (source[currentPosition + lookAhead] == '\n')
1316                   //                      break;
1317                   //                    if (source[currentPosition + lookAhead] == '\"') {
1318                   //                      currentPosition += lookAhead + 1;
1319                   //                      break;
1320                   //                    }
1321                   //                  }
1322                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1323                   //                }
1324                   if (currentCharacter == '\\') {
1325                     int escapeSize = currentPosition;
1326                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1327                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1328                     scanDoubleQuotedEscapeCharacter();
1329                     escapeSize = currentPosition - escapeSize;
1330                     if (withoutUnicodePtr == 0) {
1331                       //buffer all the entries that have been left aside....
1332                       withoutUnicodePtr =
1333                         currentPosition - escapeSize - 1 - startPosition;
1334                       System.arraycopy(
1335                         source,
1336                         startPosition,
1337                         withoutUnicodeBuffer,
1338                         1,
1339                         withoutUnicodePtr);
1340                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1341                         currentCharacter;
1342                     } else { //overwrite the / in the buffer
1343                       withoutUnicodeBuffer[withoutUnicodePtr] =
1344                         currentCharacter;
1345                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1346                         withoutUnicodePtr--;
1347                       }
1348                     }
1349                   }
1350                   // consume next character
1351                   unicodeAsBackSlash = false;
1352                   currentCharacter = source[currentPosition++];
1353 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1354 //                    && (source[currentPosition] == 'u')) {
1355 //                    getNextUnicodeChar();
1356 //                  } else {
1357                     if (withoutUnicodePtr != 0) {
1358                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1359                         currentCharacter;
1360                     }
1361 //                  }
1362
1363                 }
1364               } catch (IndexOutOfBoundsException e) {
1365                 throw new InvalidInputException(UNTERMINATED_STRING);
1366               } catch (InvalidInputException e) {
1367                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1368                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1369                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1370                     if (currentPosition + lookAhead == source.length)
1371                       break;
1372                     if (source[currentPosition + lookAhead] == '\n')
1373                       break;
1374                     if (source[currentPosition + lookAhead] == '`') {
1375                       currentPosition += lookAhead + 1;
1376                       break;
1377                     }
1378                   }
1379
1380                 }
1381                 throw e; // rethrow
1382               }
1383               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1384                 if (currentLine == null) {
1385                   currentLine = new NLSLine();
1386                   lines.add(currentLine);
1387                 }
1388                 currentLine.add(
1389                   new StringLiteral(
1390                     getCurrentTokenSourceString(),
1391                     startPosition,
1392                     currentPosition - 1));
1393               }
1394               return TokenNameStringInterpolated;
1395             case '#' :
1396             case '/' :
1397               {
1398                 int test;
1399                 if ((currentCharacter == '#')
1400                   || (test = getNextChar('/', '*')) == 0) {
1401                   //line comment 
1402                   int endPositionForLineComment = 0;
1403                   try { //get the next char 
1404                     currentCharacter = source[currentPosition++];
1405 //                    if (((currentCharacter = source[currentPosition++])
1406 //                      == '\\')
1407 //                      && (source[currentPosition] == 'u')) {
1408 //                      //-------------unicode traitement ------------
1409 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1410 //                      currentPosition++;
1411 //                      while (source[currentPosition] == 'u') {
1412 //                        currentPosition++;
1413 //                      }
1414 //                      if ((c1 =
1415 //                        Character.getNumericValue(source[currentPosition++]))
1416 //                        > 15
1417 //                        || c1 < 0
1418 //                        || (c2 =
1419 //                          Character.getNumericValue(source[currentPosition++]))
1420 //                          > 15
1421 //                        || c2 < 0
1422 //                        || (c3 =
1423 //                          Character.getNumericValue(source[currentPosition++]))
1424 //                          > 15
1425 //                        || c3 < 0
1426 //                        || (c4 =
1427 //                          Character.getNumericValue(source[currentPosition++]))
1428 //                          > 15
1429 //                        || c4 < 0) {
1430 //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1431 //                      } else {
1432 //                        currentCharacter =
1433 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1434 //                      }
1435 //                    }
1436
1437                     //handle the \\u case manually into comment
1438 //                    if (currentCharacter == '\\') {
1439 //                      if (source[currentPosition] == '\\')
1440 //                        currentPosition++;
1441 //                    } //jump over the \\
1442                     boolean isUnicode = false;
1443                     while (currentCharacter != '\r'
1444                       && currentCharacter != '\n') {
1445                       if (currentCharacter == '?') {
1446                         if (getNextChar('>')) {
1447                           startPosition = currentPosition - 2;
1448                           phpMode = false;
1449                           return TokenNameStopPHP;
1450                         }
1451                       }
1452
1453                       //get the next char
1454                       isUnicode = false;
1455                       currentCharacter = source[currentPosition++];
1456 //                      if (((currentCharacter = source[currentPosition++])
1457 //                        == '\\')
1458 //                        && (source[currentPosition] == 'u')) {
1459 //                        isUnicode = true;
1460 //                        //-------------unicode traitement ------------
1461 //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1462 //                        currentPosition++;
1463 //                        while (source[currentPosition] == 'u') {
1464 //                          currentPosition++;
1465 //                        }
1466 //                        if ((c1 =
1467 //                          Character.getNumericValue(source[currentPosition++]))
1468 //                          > 15
1469 //                          || c1 < 0
1470 //                          || (c2 =
1471 //                            Character.getNumericValue(
1472 //                              source[currentPosition++]))
1473 //                            > 15
1474 //                          || c2 < 0
1475 //                          || (c3 =
1476 //                            Character.getNumericValue(
1477 //                              source[currentPosition++]))
1478 //                            > 15
1479 //                          || c3 < 0
1480 //                          || (c4 =
1481 //                            Character.getNumericValue(
1482 //                              source[currentPosition++]))
1483 //                            > 15
1484 //                          || c4 < 0) {
1485 //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1486 //                        } else {
1487 //                          currentCharacter =
1488 //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1489 //                        }
1490 //                      }
1491                       //handle the \\u case manually into comment
1492 //                      if (currentCharacter == '\\') {
1493 //                        if (source[currentPosition] == '\\')
1494 //                          currentPosition++;
1495 //                      } //jump over the \\
1496                     }
1497                     if (isUnicode) {
1498                       endPositionForLineComment = currentPosition - 6;
1499                     } else {
1500                       endPositionForLineComment = currentPosition - 1;
1501                     }
1502                     recordComment(false);
1503                     if ((currentCharacter == '\r')
1504                       || (currentCharacter == '\n')) {
1505                       checkNonExternalizeString();
1506                       if (recordLineSeparator) {
1507                         if (isUnicode) {
1508                           pushUnicodeLineSeparator();
1509                         } else {
1510                           pushLineSeparator();
1511                         }
1512                       } else {
1513                         currentLine = null;
1514                       }
1515                     }
1516                     if (tokenizeComments) {
1517                       if (!isUnicode) {
1518                         currentPosition = endPositionForLineComment;
1519                         // reset one character behind
1520                       }
1521                       return TokenNameCOMMENT_LINE;
1522                     }
1523                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1524                     if (tokenizeComments) {
1525                       currentPosition--;
1526                       // reset one character behind
1527                       return TokenNameCOMMENT_LINE;
1528                     }
1529                   }
1530                   break;
1531                 }
1532                 if (test > 0) {
1533                   //traditional and annotation comment
1534                   boolean isJavadoc = false, star = false;
1535                   // consume next character
1536                   unicodeAsBackSlash = false;
1537                   currentCharacter = source[currentPosition++];
1538 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1539 //                    && (source[currentPosition] == 'u')) {
1540 //                    getNextUnicodeChar();
1541 //                  } else {
1542 //                    if (withoutUnicodePtr != 0) {
1543 //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1544 //                        currentCharacter;
1545 //                    }
1546 //                  }
1547
1548                   if (currentCharacter == '*') {
1549                     isJavadoc = true;
1550                     star = true;
1551                   }
1552                   if ((currentCharacter == '\r')
1553                     || (currentCharacter == '\n')) {
1554                     checkNonExternalizeString();
1555                     if (recordLineSeparator) {
1556                       pushLineSeparator();
1557                     } else {
1558                       currentLine = null;
1559                     }
1560                   }
1561                   try { //get the next char 
1562                     currentCharacter = source[currentPosition++];
1563 //                    if (((currentCharacter = source[currentPosition++])
1564 //                      == '\\')
1565 //                      && (source[currentPosition] == 'u')) {
1566 //                      //-------------unicode traitement ------------
1567 //                      getNextUnicodeChar();
1568 //                    }
1569                     //handle the \\u case manually into comment
1570 //                    if (currentCharacter == '\\') {
1571 //                      if (source[currentPosition] == '\\')
1572 //                        currentPosition++;
1573 //                      //jump over the \\
1574 //                    }
1575                     // empty comment is not a javadoc /**/
1576                     if (currentCharacter == '/') {
1577                       isJavadoc = false;
1578                     }
1579                     //loop until end of comment */
1580                     while ((currentCharacter != '/') || (!star)) {
1581                       if ((currentCharacter == '\r')
1582                         || (currentCharacter == '\n')) {
1583                         checkNonExternalizeString();
1584                         if (recordLineSeparator) {
1585                           pushLineSeparator();
1586                         } else {
1587                           currentLine = null;
1588                         }
1589                       }
1590                       star = currentCharacter == '*';
1591                       //get next char
1592                       currentCharacter = source[currentPosition++];
1593 //                      if (((currentCharacter = source[currentPosition++])
1594 //                        == '\\')
1595 //                        && (source[currentPosition] == 'u')) {
1596 //                        //-------------unicode traitement ------------
1597 //                        getNextUnicodeChar();
1598 //                      }
1599                       //handle the \\u case manually into comment
1600 //                      if (currentCharacter == '\\') {
1601 //                        if (source[currentPosition] == '\\')
1602 //                          currentPosition++;
1603 //                      } //jump over the \\
1604                     }
1605                     recordComment(isJavadoc);
1606                     if (tokenizeComments) {
1607                       if (isJavadoc)
1608                         return TokenNameCOMMENT_PHPDOC;
1609                       return TokenNameCOMMENT_BLOCK;
1610                     }
1611                   } catch (IndexOutOfBoundsException e) {
1612                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1613                   }
1614                   break;
1615                 }
1616                 if (getNextChar('='))
1617                   return TokenNameDIVIDE_EQUAL;
1618                 return TokenNameDIVIDE;
1619               }
1620             case '\u001a' :
1621               if (atEnd())
1622                 return TokenNameEOF;
1623               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1624               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1625
1626             default :
1627               if (currentCharacter == '$') {
1628                 while ((currentCharacter = source[currentPosition++]) == '$') {
1629                 }
1630                 if (currentCharacter == '{')
1631                   return TokenNameDOLLAR_LBRACE;
1632                 if (isPHPIdentifierStart(currentCharacter))
1633                   return scanIdentifierOrKeyword(true);
1634                 return TokenNameERROR;
1635               }
1636               if (isPHPIdentifierStart(currentCharacter))
1637                 return scanIdentifierOrKeyword(false);
1638               if (Character.isDigit(currentCharacter))
1639                 return scanNumber(false);
1640               return TokenNameERROR;
1641           }
1642         }
1643       } //-----------------end switch while try--------------------
1644       catch (IndexOutOfBoundsException e) {
1645       }
1646     }
1647     return TokenNameEOF;
1648   }
1649
1650 //  public final void getNextUnicodeChar()
1651 //    throws IndexOutOfBoundsException, InvalidInputException {
1652 //    //VOID
1653 //    //handle the case of unicode.
1654 //    //when a unicode appears then we must use a buffer that holds char internal values
1655 //    //At the end of this method currentCharacter holds the new visited char
1656 //    //and currentPosition points right next after it
1657 //
1658 //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
1659 //
1660 //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1661 //    currentPosition++;
1662 //    while (source[currentPosition] == 'u') {
1663 //      currentPosition++;
1664 //      unicodeSize++;
1665 //    }
1666 //
1667 //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1668 //      || c1 < 0
1669 //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1670 //      || c2 < 0
1671 //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1672 //      || c3 < 0
1673 //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1674 //      || c4 < 0) {
1675 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1676 //    } else {
1677 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1678 //      //need the unicode buffer
1679 //      if (withoutUnicodePtr == 0) {
1680 //        //buffer all the entries that have been left aside....
1681 //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1682 //        System.arraycopy(
1683 //          source,
1684 //          startPosition,
1685 //          withoutUnicodeBuffer,
1686 //          1,
1687 //          withoutUnicodePtr);
1688 //      }
1689 //      //fill the buffer with the char
1690 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1691 //    }
1692 //    unicodeAsBackSlash = currentCharacter == '\\';
1693 //  }
1694   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1695    */
1696   public final void jumpOverMethodBody() {
1697
1698     this.wasAcr = false;
1699     int found = 1;
1700     try {
1701       while (true) { //loop for jumping over comments
1702         // ---------Consume white space and handles startPosition---------
1703         boolean isWhiteSpace;
1704         do {
1705           startPosition = currentPosition;
1706           currentCharacter = source[currentPosition++];
1707 //          if (((currentCharacter = source[currentPosition++]) == '\\')
1708 //            && (source[currentPosition] == 'u')) {
1709 //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1710 //          } else {
1711             if (recordLineSeparator
1712               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1713               pushLineSeparator();
1714             isWhiteSpace = Character.isWhitespace(currentCharacter);
1715 //          }
1716         } while (isWhiteSpace);
1717
1718         // -------consume token until } is found---------
1719         switch (currentCharacter) {
1720           case '{' :
1721             found++;
1722             break;
1723           case '}' :
1724             found--;
1725             if (found == 0)
1726               return;
1727             break;
1728           case '\'' :
1729             {
1730               boolean test;
1731               test = getNextChar('\\');
1732               if (test) {
1733                 try {
1734                   scanDoubleQuotedEscapeCharacter();
1735                 } catch (InvalidInputException ex) {
1736                 };
1737               } else {
1738 //                try { // consume next character
1739                   unicodeAsBackSlash = false;
1740                   currentCharacter = source[currentPosition++];
1741 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1742 //                    && (source[currentPosition] == 'u')) {
1743 //                    getNextUnicodeChar();
1744 //                  } else {
1745                     if (withoutUnicodePtr != 0) {
1746                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1747                         currentCharacter;
1748                     }
1749 //                  }
1750 //                } catch (InvalidInputException ex) {
1751 //                };
1752               }
1753               getNextChar('\'');
1754               break;
1755             }
1756           case '"' :
1757             try {
1758 //              try { // consume next character
1759                 unicodeAsBackSlash = false;
1760                 currentCharacter = source[currentPosition++];
1761 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1762 //                  && (source[currentPosition] == 'u')) {
1763 //                  getNextUnicodeChar();
1764 //                } else {
1765                   if (withoutUnicodePtr != 0) {
1766                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1767                       currentCharacter;
1768                   }
1769 //                }
1770 //              } catch (InvalidInputException ex) {
1771 //              };
1772               while (currentCharacter != '"') {
1773                 if (currentCharacter == '\r') {
1774                   if (source[currentPosition] == '\n')
1775                     currentPosition++;
1776                   break;
1777                   // the string cannot go further that the line
1778                 }
1779                 if (currentCharacter == '\n') {
1780                   break;
1781                   // the string cannot go further that the line
1782                 }
1783                 if (currentCharacter == '\\') {
1784                   try {
1785                     scanDoubleQuotedEscapeCharacter();
1786                   } catch (InvalidInputException ex) {
1787                   };
1788                 }
1789 //                try { // consume next character
1790                   unicodeAsBackSlash = false;
1791                   currentCharacter = source[currentPosition++];
1792 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1793 //                    && (source[currentPosition] == 'u')) {
1794 //                    getNextUnicodeChar();
1795 //                  } else {
1796                     if (withoutUnicodePtr != 0) {
1797                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1798                         currentCharacter;
1799                     }
1800 //                  }
1801 //                } catch (InvalidInputException ex) {
1802 //                };
1803               }
1804             } catch (IndexOutOfBoundsException e) {
1805               return;
1806             }
1807             break;
1808           case '/' :
1809             {
1810               int test;
1811               if ((test = getNextChar('/', '*')) == 0) {
1812                 //line comment 
1813                 try {
1814                   //get the next char 
1815                   currentCharacter = source[currentPosition++];
1816 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1817 //                    && (source[currentPosition] == 'u')) {
1818 //                    //-------------unicode traitement ------------
1819 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1820 //                    currentPosition++;
1821 //                    while (source[currentPosition] == 'u') {
1822 //                      currentPosition++;
1823 //                    }
1824 //                    if ((c1 =
1825 //                      Character.getNumericValue(source[currentPosition++]))
1826 //                      > 15
1827 //                      || c1 < 0
1828 //                      || (c2 =
1829 //                        Character.getNumericValue(source[currentPosition++]))
1830 //                        > 15
1831 //                      || c2 < 0
1832 //                      || (c3 =
1833 //                        Character.getNumericValue(source[currentPosition++]))
1834 //                        > 15
1835 //                      || c3 < 0
1836 //                      || (c4 =
1837 //                        Character.getNumericValue(source[currentPosition++]))
1838 //                        > 15
1839 //                      || c4 < 0) {
1840 //                      //error don't care of the value
1841 //                      currentCharacter = 'A';
1842 //                    } //something different from \n and \r
1843 //                    else {
1844 //                      currentCharacter =
1845 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1846 //                    }
1847 //                  }
1848
1849                   while (currentCharacter != '\r'
1850                     && currentCharacter != '\n') {
1851                     //get the next char 
1852                     currentCharacter = source[currentPosition++];
1853 //                    if (((currentCharacter = source[currentPosition++])
1854 //                      == '\\')
1855 //                      && (source[currentPosition] == 'u')) {
1856 //                      //-------------unicode traitement ------------
1857 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1858 //                      currentPosition++;
1859 //                      while (source[currentPosition] == 'u') {
1860 //                        currentPosition++;
1861 //                      }
1862 //                      if ((c1 =
1863 //                        Character.getNumericValue(source[currentPosition++]))
1864 //                        > 15
1865 //                        || c1 < 0
1866 //                        || (c2 =
1867 //                          Character.getNumericValue(source[currentPosition++]))
1868 //                          > 15
1869 //                        || c2 < 0
1870 //                        || (c3 =
1871 //                          Character.getNumericValue(source[currentPosition++]))
1872 //                          > 15
1873 //                        || c3 < 0
1874 //                        || (c4 =
1875 //                          Character.getNumericValue(source[currentPosition++]))
1876 //                          > 15
1877 //                        || c4 < 0) {
1878 //                        //error don't care of the value
1879 //                        currentCharacter = 'A';
1880 //                      } //something different from \n and \r
1881 //                      else {
1882 //                        currentCharacter =
1883 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1884 //                      }
1885 //                    }
1886                   }
1887                   if (recordLineSeparator
1888                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1889                     pushLineSeparator();
1890                 } catch (IndexOutOfBoundsException e) {
1891                 } //an eof will them be generated
1892                 break;
1893               }
1894               if (test > 0) {
1895                 //traditional and annotation comment
1896                 boolean star = false;
1897 //                try { // consume next character
1898                   unicodeAsBackSlash = false;
1899                   currentCharacter = source[currentPosition++];
1900 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1901 //                    && (source[currentPosition] == 'u')) {
1902 //                    getNextUnicodeChar();
1903 //                  } else {
1904                     if (withoutUnicodePtr != 0) {
1905                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1906                         currentCharacter;
1907                     }
1908 //                  };
1909 //                } catch (InvalidInputException ex) {
1910 //                };
1911                 if (currentCharacter == '*') {
1912                   star = true;
1913                 }
1914                 if (recordLineSeparator
1915                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1916                   pushLineSeparator();
1917                 try { //get the next char 
1918                   currentCharacter = source[currentPosition++];
1919 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1920 //                    && (source[currentPosition] == 'u')) {
1921 //                    //-------------unicode traitement ------------
1922 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1923 //                    currentPosition++;
1924 //                    while (source[currentPosition] == 'u') {
1925 //                      currentPosition++;
1926 //                    }
1927 //                    if ((c1 =
1928 //                      Character.getNumericValue(source[currentPosition++]))
1929 //                      > 15
1930 //                      || c1 < 0
1931 //                      || (c2 =
1932 //                        Character.getNumericValue(source[currentPosition++]))
1933 //                        > 15
1934 //                      || c2 < 0
1935 //                      || (c3 =
1936 //                        Character.getNumericValue(source[currentPosition++]))
1937 //                        > 15
1938 //                      || c3 < 0
1939 //                      || (c4 =
1940 //                        Character.getNumericValue(source[currentPosition++]))
1941 //                        > 15
1942 //                      || c4 < 0) {
1943 //                      //error don't care of the value
1944 //                      currentCharacter = 'A';
1945 //                    } //something different from * and /
1946 //                    else {
1947 //                      currentCharacter =
1948 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1949 //                    }
1950 //                  }
1951                   //loop until end of comment */ 
1952                   while ((currentCharacter != '/') || (!star)) {
1953                     if (recordLineSeparator
1954                       && ((currentCharacter == '\r')
1955                         || (currentCharacter == '\n')))
1956                       pushLineSeparator();
1957                     star = currentCharacter == '*';
1958                     //get next char
1959                     currentCharacter = source[currentPosition++];
1960 //                    if (((currentCharacter = source[currentPosition++])
1961 //                      == '\\')
1962 //                      && (source[currentPosition] == 'u')) {
1963 //                      //-------------unicode traitement ------------
1964 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1965 //                      currentPosition++;
1966 //                      while (source[currentPosition] == 'u') {
1967 //                        currentPosition++;
1968 //                      }
1969 //                      if ((c1 =
1970 //                        Character.getNumericValue(source[currentPosition++]))
1971 //                        > 15
1972 //                        || c1 < 0
1973 //                        || (c2 =
1974 //                          Character.getNumericValue(source[currentPosition++]))
1975 //                          > 15
1976 //                        || c2 < 0
1977 //                        || (c3 =
1978 //                          Character.getNumericValue(source[currentPosition++]))
1979 //                          > 15
1980 //                        || c3 < 0
1981 //                        || (c4 =
1982 //                          Character.getNumericValue(source[currentPosition++]))
1983 //                          > 15
1984 //                        || c4 < 0) {
1985 //                        //error don't care of the value
1986 //                        currentCharacter = 'A';
1987 //                      } //something different from * and /
1988 //                      else {
1989 //                        currentCharacter =
1990 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1991 //                      }
1992 //                    }
1993                   }
1994                 } catch (IndexOutOfBoundsException e) {
1995                   return;
1996                 }
1997                 break;
1998               }
1999               break;
2000             }
2001
2002           default :
2003             if (isPHPIdentifierStart(currentCharacter)
2004               || currentCharacter == '$') {
2005               try {
2006                 scanIdentifierOrKeyword((currentCharacter == '$'));
2007               } catch (InvalidInputException ex) {
2008               };
2009               break;
2010             }
2011             if (Character.isDigit(currentCharacter)) {
2012               try {
2013                 scanNumber(false);
2014               } catch (InvalidInputException ex) {
2015               };
2016               break;
2017             }
2018         }
2019       }
2020       //-----------------end switch while try--------------------
2021     } catch (IndexOutOfBoundsException e) {
2022     } catch (InvalidInputException e) {
2023     }
2024     return;
2025   }
2026 //  public final boolean jumpOverUnicodeWhiteSpace()
2027 //    throws InvalidInputException {
2028 //    //BOOLEAN
2029 //    //handle the case of unicode. Jump over the next whiteSpace
2030 //    //making startPosition pointing on the next available char
2031 //    //On false, the currentCharacter is filled up with a potential
2032 //    //correct char
2033 //
2034 //    try {
2035 //      this.wasAcr = false;
2036 //      int c1, c2, c3, c4;
2037 //      int unicodeSize = 6;
2038 //      currentPosition++;
2039 //      while (source[currentPosition] == 'u') {
2040 //        currentPosition++;
2041 //        unicodeSize++;
2042 //      }
2043 //
2044 //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2045 //        || c1 < 0)
2046 //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2047 //          || c2 < 0)
2048 //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2049 //          || c3 < 0)
2050 //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2051 //          || c4 < 0)) {
2052 //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2053 //      }
2054 //
2055 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2056 //      if (recordLineSeparator
2057 //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2058 //        pushLineSeparator();
2059 //      if (Character.isWhitespace(currentCharacter))
2060 //        return true;
2061 //
2062 //      //buffer the new char which is not a white space
2063 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2064 //      //withoutUnicodePtr == 1 is true here
2065 //      return false;
2066 //    } catch (IndexOutOfBoundsException e) {
2067 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2068 //    }
2069 //  }
2070   public final int[] getLineEnds() {
2071     //return a bounded copy of this.lineEnds 
2072
2073     int[] copy;
2074     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2075     return copy;
2076   }
2077
2078   public char[] getSource() {
2079     return this.source;
2080   }
2081   final char[] optimizedCurrentTokenSource1() {
2082     //return always the same char[] build only once
2083
2084     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2085     char charOne = source[startPosition];
2086     switch (charOne) {
2087       case 'a' :
2088         return charArray_a;
2089       case 'b' :
2090         return charArray_b;
2091       case 'c' :
2092         return charArray_c;
2093       case 'd' :
2094         return charArray_d;
2095       case 'e' :
2096         return charArray_e;
2097       case 'f' :
2098         return charArray_f;
2099       case 'g' :
2100         return charArray_g;
2101       case 'h' :
2102         return charArray_h;
2103       case 'i' :
2104         return charArray_i;
2105       case 'j' :
2106         return charArray_j;
2107       case 'k' :
2108         return charArray_k;
2109       case 'l' :
2110         return charArray_l;
2111       case 'm' :
2112         return charArray_m;
2113       case 'n' :
2114         return charArray_n;
2115       case 'o' :
2116         return charArray_o;
2117       case 'p' :
2118         return charArray_p;
2119       case 'q' :
2120         return charArray_q;
2121       case 'r' :
2122         return charArray_r;
2123       case 's' :
2124         return charArray_s;
2125       case 't' :
2126         return charArray_t;
2127       case 'u' :
2128         return charArray_u;
2129       case 'v' :
2130         return charArray_v;
2131       case 'w' :
2132         return charArray_w;
2133       case 'x' :
2134         return charArray_x;
2135       case 'y' :
2136         return charArray_y;
2137       case 'z' :
2138         return charArray_z;
2139       default :
2140         return new char[] { charOne };
2141     }
2142   }
2143
2144   final char[] optimizedCurrentTokenSource2() {
2145     //try to return the same char[] build only once
2146
2147     char c0, c1;
2148     int hash =
2149       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2150         % TableSize;
2151     char[][] table = charArray_length[0][hash];
2152     int i = newEntry2;
2153     while (++i < InternalTableSize) {
2154       char[] charArray = table[i];
2155       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2156         return charArray;
2157     }
2158     //---------other side---------
2159     i = -1;
2160     int max = newEntry2;
2161     while (++i <= max) {
2162       char[] charArray = table[i];
2163       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2164         return charArray;
2165     }
2166     //--------add the entry-------
2167     if (++max >= InternalTableSize)
2168       max = 0;
2169     char[] r;
2170     table[max] = (r = new char[] { c0, c1 });
2171     newEntry2 = max;
2172     return r;
2173   }
2174
2175   final char[] optimizedCurrentTokenSource3() {
2176     //try to return the same char[] build only once
2177
2178     char c0, c1, c2;
2179     int hash =
2180       (((c0 = source[startPosition]) << 12)
2181         + ((c1 = source[startPosition + 1]) << 6)
2182         + (c2 = source[startPosition + 2]))
2183         % TableSize;
2184     char[][] table = charArray_length[1][hash];
2185     int i = newEntry3;
2186     while (++i < InternalTableSize) {
2187       char[] charArray = table[i];
2188       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2189         return charArray;
2190     }
2191     //---------other side---------
2192     i = -1;
2193     int max = newEntry3;
2194     while (++i <= max) {
2195       char[] charArray = table[i];
2196       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2197         return charArray;
2198     }
2199     //--------add the entry-------
2200     if (++max >= InternalTableSize)
2201       max = 0;
2202     char[] r;
2203     table[max] = (r = new char[] { c0, c1, c2 });
2204     newEntry3 = max;
2205     return r;
2206   }
2207
2208   final char[] optimizedCurrentTokenSource4() {
2209     //try to return the same char[] build only once
2210
2211     char c0, c1, c2, c3;
2212     long hash =
2213       ((((long) (c0 = source[startPosition])) << 18)
2214         + ((c1 = source[startPosition + 1]) << 12)
2215         + ((c2 = source[startPosition + 2]) << 6)
2216         + (c3 = source[startPosition + 3]))
2217         % TableSize;
2218     char[][] table = charArray_length[2][(int) hash];
2219     int i = newEntry4;
2220     while (++i < InternalTableSize) {
2221       char[] charArray = table[i];
2222       if ((c0 == charArray[0])
2223         && (c1 == charArray[1])
2224         && (c2 == charArray[2])
2225         && (c3 == charArray[3]))
2226         return charArray;
2227     }
2228     //---------other side---------
2229     i = -1;
2230     int max = newEntry4;
2231     while (++i <= max) {
2232       char[] charArray = table[i];
2233       if ((c0 == charArray[0])
2234         && (c1 == charArray[1])
2235         && (c2 == charArray[2])
2236         && (c3 == charArray[3]))
2237         return charArray;
2238     }
2239     //--------add the entry-------
2240     if (++max >= InternalTableSize)
2241       max = 0;
2242     char[] r;
2243     table[max] = (r = new char[] { c0, c1, c2, c3 });
2244     newEntry4 = max;
2245     return r;
2246
2247   }
2248
2249   final char[] optimizedCurrentTokenSource5() {
2250     //try to return the same char[] build only once
2251
2252     char c0, c1, c2, c3, c4;
2253     long hash =
2254       ((((long) (c0 = source[startPosition])) << 24)
2255         + (((long) (c1 = source[startPosition + 1])) << 18)
2256         + ((c2 = source[startPosition + 2]) << 12)
2257         + ((c3 = source[startPosition + 3]) << 6)
2258         + (c4 = source[startPosition + 4]))
2259         % TableSize;
2260     char[][] table = charArray_length[3][(int) hash];
2261     int i = newEntry5;
2262     while (++i < InternalTableSize) {
2263       char[] charArray = table[i];
2264       if ((c0 == charArray[0])
2265         && (c1 == charArray[1])
2266         && (c2 == charArray[2])
2267         && (c3 == charArray[3])
2268         && (c4 == charArray[4]))
2269         return charArray;
2270     }
2271     //---------other side---------
2272     i = -1;
2273     int max = newEntry5;
2274     while (++i <= max) {
2275       char[] charArray = table[i];
2276       if ((c0 == charArray[0])
2277         && (c1 == charArray[1])
2278         && (c2 == charArray[2])
2279         && (c3 == charArray[3])
2280         && (c4 == charArray[4]))
2281         return charArray;
2282     }
2283     //--------add the entry-------
2284     if (++max >= InternalTableSize)
2285       max = 0;
2286     char[] r;
2287     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2288     newEntry5 = max;
2289     return r;
2290
2291   }
2292
2293   final char[] optimizedCurrentTokenSource6() {
2294     //try to return the same char[] build only once
2295
2296     char c0, c1, c2, c3, c4, c5;
2297     long hash =
2298       ((((long) (c0 = source[startPosition])) << 32)
2299         + (((long) (c1 = source[startPosition + 1])) << 24)
2300         + (((long) (c2 = source[startPosition + 2])) << 18)
2301         + ((c3 = source[startPosition + 3]) << 12)
2302         + ((c4 = source[startPosition + 4]) << 6)
2303         + (c5 = source[startPosition + 5]))
2304         % TableSize;
2305     char[][] table = charArray_length[4][(int) hash];
2306     int i = newEntry6;
2307     while (++i < InternalTableSize) {
2308       char[] charArray = table[i];
2309       if ((c0 == charArray[0])
2310         && (c1 == charArray[1])
2311         && (c2 == charArray[2])
2312         && (c3 == charArray[3])
2313         && (c4 == charArray[4])
2314         && (c5 == charArray[5]))
2315         return charArray;
2316     }
2317     //---------other side---------
2318     i = -1;
2319     int max = newEntry6;
2320     while (++i <= max) {
2321       char[] charArray = table[i];
2322       if ((c0 == charArray[0])
2323         && (c1 == charArray[1])
2324         && (c2 == charArray[2])
2325         && (c3 == charArray[3])
2326         && (c4 == charArray[4])
2327         && (c5 == charArray[5]))
2328         return charArray;
2329     }
2330     //--------add the entry-------
2331     if (++max >= InternalTableSize)
2332       max = 0;
2333     char[] r;
2334     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2335     newEntry6 = max;
2336     return r;
2337   }
2338
2339   public final void pushLineSeparator() throws InvalidInputException {
2340     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2341     final int INCREMENT = 250;
2342
2343     if (this.checkNonExternalizedStringLiterals) {
2344       // reinitialize the current line for non externalize strings purpose
2345       currentLine = null;
2346     }
2347     //currentCharacter is at position currentPosition-1
2348
2349     // cr 000D
2350     if (currentCharacter == '\r') {
2351       int separatorPos = currentPosition - 1;
2352       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2353         return;
2354       //System.out.println("CR-" + separatorPos);
2355       try {
2356         lineEnds[++linePtr] = separatorPos;
2357       } catch (IndexOutOfBoundsException e) {
2358         //linePtr value is correct
2359         int oldLength = lineEnds.length;
2360         int[] old = lineEnds;
2361         lineEnds = new int[oldLength + INCREMENT];
2362         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2363         lineEnds[linePtr] = separatorPos;
2364       }
2365       // look-ahead for merged cr+lf
2366       try {
2367         if (source[currentPosition] == '\n') {
2368           //System.out.println("look-ahead LF-" + currentPosition);                     
2369           lineEnds[linePtr] = currentPosition;
2370           currentPosition++;
2371           wasAcr = false;
2372         } else {
2373           wasAcr = true;
2374         }
2375       } catch (IndexOutOfBoundsException e) {
2376         wasAcr = true;
2377       }
2378     } else {
2379       // lf 000A
2380       if (currentCharacter == '\n') {
2381         //must merge eventual cr followed by lf
2382         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2383           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2384           lineEnds[linePtr] = currentPosition - 1;
2385         } else {
2386           int separatorPos = currentPosition - 1;
2387           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2388             return;
2389           // System.out.println("LF-" + separatorPos);                                                  
2390           try {
2391             lineEnds[++linePtr] = separatorPos;
2392           } catch (IndexOutOfBoundsException e) {
2393             //linePtr value is correct
2394             int oldLength = lineEnds.length;
2395             int[] old = lineEnds;
2396             lineEnds = new int[oldLength + INCREMENT];
2397             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2398             lineEnds[linePtr] = separatorPos;
2399           }
2400         }
2401         wasAcr = false;
2402       }
2403     }
2404   }
2405   public final void pushUnicodeLineSeparator() {
2406     // isUnicode means that the \r or \n has been read as a unicode character
2407
2408     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2409
2410     final int INCREMENT = 250;
2411     //currentCharacter is at position currentPosition-1
2412
2413     if (this.checkNonExternalizedStringLiterals) {
2414       // reinitialize the current line for non externalize strings purpose
2415       currentLine = null;
2416     }
2417
2418     // cr 000D
2419     if (currentCharacter == '\r') {
2420       int separatorPos = currentPosition - 6;
2421       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2422         return;
2423       //System.out.println("CR-" + separatorPos);
2424       try {
2425         lineEnds[++linePtr] = separatorPos;
2426       } catch (IndexOutOfBoundsException e) {
2427         //linePtr value is correct
2428         int oldLength = lineEnds.length;
2429         int[] old = lineEnds;
2430         lineEnds = new int[oldLength + INCREMENT];
2431         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2432         lineEnds[linePtr] = separatorPos;
2433       }
2434       // look-ahead for merged cr+lf
2435       if (source[currentPosition] == '\n') {
2436         //System.out.println("look-ahead LF-" + currentPosition);                       
2437         lineEnds[linePtr] = currentPosition;
2438         currentPosition++;
2439         wasAcr = false;
2440       } else {
2441         wasAcr = true;
2442       }
2443     } else {
2444       // lf 000A
2445       if (currentCharacter == '\n') {
2446         //must merge eventual cr followed by lf
2447         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2448           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2449           lineEnds[linePtr] = currentPosition - 6;
2450         } else {
2451           int separatorPos = currentPosition - 6;
2452           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2453             return;
2454           // System.out.println("LF-" + separatorPos);                                                  
2455           try {
2456             lineEnds[++linePtr] = separatorPos;
2457           } catch (IndexOutOfBoundsException e) {
2458             //linePtr value is correct
2459             int oldLength = lineEnds.length;
2460             int[] old = lineEnds;
2461             lineEnds = new int[oldLength + INCREMENT];
2462             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2463             lineEnds[linePtr] = separatorPos;
2464           }
2465         }
2466         wasAcr = false;
2467       }
2468     }
2469   }
2470   public final void recordComment(boolean isJavadoc) {
2471
2472     // a new annotation comment is recorded
2473     try {
2474       commentStops[++commentPtr] =
2475         isJavadoc ? currentPosition : -currentPosition;
2476     } catch (IndexOutOfBoundsException e) {
2477       int oldStackLength = commentStops.length;
2478       int[] oldStack = commentStops;
2479       commentStops = new int[oldStackLength + 30];
2480       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2481       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2482       //grows the positions buffers too
2483       int[] old = commentStarts;
2484       commentStarts = new int[oldStackLength + 30];
2485       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2486     }
2487
2488     //the buffer is of a correct size here
2489     commentStarts[commentPtr] = startPosition;
2490   }
2491   public void resetTo(int begin, int end) {
2492     //reset the scanner to a given position where it may rescan again
2493
2494     diet = false;
2495     initialPosition = startPosition = currentPosition = begin;
2496     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2497     commentPtr = -1; // reset comment stack
2498   }
2499
2500   public final void scanSingleQuotedEscapeCharacter()
2501     throws InvalidInputException {
2502     // the string with "\\u" is a legal string of two chars \ and u
2503     //thus we use a direct access to the source (for regular cases).
2504
2505 //    if (unicodeAsBackSlash) {
2506 //      // consume next character
2507 //      unicodeAsBackSlash = false;
2508 //      if (((currentCharacter = source[currentPosition++]) == '\\')
2509 //        && (source[currentPosition] == 'u')) {
2510 //        getNextUnicodeChar();
2511 //      } else {
2512 //        if (withoutUnicodePtr != 0) {
2513 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2514 //        }
2515 //      }
2516 //    } else
2517       currentCharacter = source[currentPosition++];
2518     switch (currentCharacter) {
2519       case '\'' :
2520         currentCharacter = '\'';
2521         break;
2522       case '\\' :
2523         currentCharacter = '\\';
2524         break;
2525       default :
2526         currentCharacter = '\\';
2527         currentPosition--;
2528     }
2529   }
2530
2531   public final void scanDoubleQuotedEscapeCharacter()
2532     throws InvalidInputException {
2533     // the string with "\\u" is a legal string of two chars \ and u
2534     //thus we use a direct access to the source (for regular cases).
2535
2536 //    if (unicodeAsBackSlash) {
2537 //      // consume next character
2538 //      unicodeAsBackSlash = false;
2539 //      if (((currentCharacter = source[currentPosition++]) == '\\')
2540 //        && (source[currentPosition] == 'u')) {
2541 //        getNextUnicodeChar();
2542 //      } else {
2543 //        if (withoutUnicodePtr != 0) {
2544 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2545 //        }
2546 //      }
2547 //    } else
2548       currentCharacter = source[currentPosition++];
2549     switch (currentCharacter) {
2550       //      case 'b' :
2551       //        currentCharacter = '\b';
2552       //        break;
2553       case 't' :
2554         currentCharacter = '\t';
2555         break;
2556       case 'n' :
2557         currentCharacter = '\n';
2558         break;
2559         //      case 'f' :
2560         //        currentCharacter = '\f';
2561         //        break;
2562       case 'r' :
2563         currentCharacter = '\r';
2564         break;
2565       case '\"' :
2566         currentCharacter = '\"';
2567         break;
2568       case '\'' :
2569         currentCharacter = '\'';
2570         break;
2571       case '\\' :
2572         currentCharacter = '\\';
2573         break;
2574       case '$' :
2575         currentCharacter = '$';
2576         break;
2577       default :
2578         // -----------octal escape--------------
2579         // OctalDigit
2580         // OctalDigit OctalDigit
2581         // ZeroToThree OctalDigit OctalDigit
2582
2583         int number = Character.getNumericValue(currentCharacter);
2584         if (number >= 0 && number <= 7) {
2585           boolean zeroToThreeNot = number > 3;
2586           if (Character
2587             .isDigit(currentCharacter = source[currentPosition++])) {
2588             int digit = Character.getNumericValue(currentCharacter);
2589             if (digit >= 0 && digit <= 7) {
2590               number = (number * 8) + digit;
2591               if (Character
2592                 .isDigit(currentCharacter = source[currentPosition++])) {
2593                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2594                   currentPosition--;
2595                 } else {
2596                   digit = Character.getNumericValue(currentCharacter);
2597                   if (digit >= 0 && digit <= 7) {
2598                     // has read \ZeroToThree OctalDigit OctalDigit
2599                     number = (number * 8) + digit;
2600                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2601                     currentPosition--;
2602                   }
2603                 }
2604               } else { // has read \OctalDigit NonDigit--> ignore last character
2605                 currentPosition--;
2606               }
2607             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
2608               currentPosition--;
2609             }
2610           } else { // has read \OctalDigit --> ignore last character
2611             currentPosition--;
2612           }
2613           if (number > 255)
2614             throw new InvalidInputException(INVALID_ESCAPE);
2615           currentCharacter = (char) number;
2616         }
2617         //else
2618         //     throw new InvalidInputException(INVALID_ESCAPE);
2619     }
2620   }
2621
2622   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2623   //    return scanIdentifierOrKeyword( false );
2624   //  }
2625
2626   public int scanIdentifierOrKeyword(boolean isVariable) 
2627     throws InvalidInputException {
2628     //test keywords
2629
2630     //first dispatch on the first char.
2631     //then the length. If there are several
2632     //keywors with the same length AND the same first char, then do another
2633     //disptach on the second char :-)...cool....but fast !
2634
2635     useAssertAsAnIndentifier = false;
2636
2637     while (getNextCharAsJavaIdentifierPart()) {
2638     };
2639
2640     if (isVariable) {
2641       return TokenNameVariable;
2642     }
2643     int index, length;
2644     char[] data;
2645     char firstLetter;
2646 //    if (withoutUnicodePtr == 0)
2647
2648       //quick test on length == 1 but not on length > 12 while most identifier
2649       //have a length which is <= 12...but there are lots of identifier with
2650       //only one char....
2651
2652 //      {
2653       if ((length = currentPosition - startPosition) == 1)
2654         return TokenNameIdentifier;
2655       //  data = source;
2656       data = new char[length];
2657       index = startPosition;
2658       for (int i = 0; i < length; i++) {
2659         data[i] = Character.toLowerCase(source[index + i]);
2660       }
2661       index = 0;
2662 //    } else {
2663 //      if ((length = withoutUnicodePtr) == 1)
2664 //        return TokenNameIdentifier;
2665 //      // data = withoutUnicodeBuffer;
2666 //      data = new char[withoutUnicodeBuffer.length];
2667 //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2668 //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2669 //      }
2670 //      index = 1;
2671 //    }
2672
2673     firstLetter = data[index];
2674     switch (firstLetter) {
2675
2676       case 'a' : // as and array
2677         switch (length) {
2678           case 2 : //as
2679             if ((data[++index] == 's')) {
2680               return TokenNameas;
2681             } else {
2682               return TokenNameIdentifier;
2683             }
2684           case 3 : //and
2685             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2686               return TokenNameAND;
2687             } else {
2688               return TokenNameIdentifier;
2689             }
2690             //          case 5 :
2691             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2692             //              return TokenNamearray;
2693             //            else
2694             //              return TokenNameIdentifier;
2695           default :
2696             return TokenNameIdentifier;
2697         }
2698       case 'b' : //break
2699         switch (length) {
2700           case 5 :
2701             if ((data[++index] == 'r')
2702               && (data[++index] == 'e')
2703               && (data[++index] == 'a')
2704               && (data[++index] == 'k'))
2705               return TokenNamebreak;
2706             else
2707               return TokenNameIdentifier;
2708           default :
2709             return TokenNameIdentifier;
2710         }
2711
2712       case 'c' : //case class continue
2713         switch (length) {
2714           case 4 :
2715             if ((data[++index] == 'a')
2716               && (data[++index] == 's')
2717               && (data[++index] == 'e'))
2718               return TokenNamecase;
2719             else
2720               return TokenNameIdentifier;
2721           case 5 :
2722             if ((data[++index] == 'l')
2723               && (data[++index] == 'a')
2724               && (data[++index] == 's')
2725               && (data[++index] == 's'))
2726               return TokenNameclass;
2727             else
2728               return TokenNameIdentifier;
2729           case 8 :
2730             if ((data[++index] == 'o')
2731               && (data[++index] == 'n')
2732               && (data[++index] == 't')
2733               && (data[++index] == 'i')
2734               && (data[++index] == 'n')
2735               && (data[++index] == 'u')
2736               && (data[++index] == 'e'))
2737               return TokenNamecontinue;
2738             else
2739               return TokenNameIdentifier;
2740           default :
2741             return TokenNameIdentifier;
2742         }
2743
2744       case 'd' : //define default do 
2745         switch (length) {
2746           case 2 :
2747             if ((data[++index] == 'o'))
2748               return TokenNamedo;
2749             else
2750               return TokenNameIdentifier;
2751           case 6 :
2752             if ((data[++index] == 'e')
2753               && (data[++index] == 'f')
2754               && (data[++index] == 'i')
2755               && (data[++index] == 'n')
2756               && (data[++index] == 'e'))
2757               return TokenNamedefine;
2758             else
2759               return TokenNameIdentifier;
2760           case 7 :
2761             if ((data[++index] == 'e')
2762               && (data[++index] == 'f')
2763               && (data[++index] == 'a')
2764               && (data[++index] == 'u')
2765               && (data[++index] == 'l')
2766               && (data[++index] == 't'))
2767               return TokenNamedefault;
2768             else
2769               return TokenNameIdentifier;
2770           default :
2771             return TokenNameIdentifier;
2772         }
2773       case 'e' : //echo else elseif extends
2774         switch (length) {
2775           case 4 :
2776             if ((data[++index] == 'c')
2777               && (data[++index] == 'h')
2778               && (data[++index] == 'o'))
2779               return TokenNameecho;
2780             else if (
2781               (data[index] == 'l')
2782                 && (data[++index] == 's')
2783                 && (data[++index] == 'e'))
2784               return TokenNameelse;
2785             else
2786               return TokenNameIdentifier;
2787           case 5 : // endif
2788             if ((data[++index] == 'n')
2789               && (data[++index] == 'd')
2790               && (data[++index] == 'i')
2791               && (data[++index] == 'f'))
2792               return TokenNameendif;
2793             else
2794               return TokenNameIdentifier;
2795           case 6 : // endfor
2796             if ((data[++index] == 'n')
2797               && (data[++index] == 'd')
2798               && (data[++index] == 'f')
2799               && (data[++index] == 'o')
2800               && (data[++index] == 'r'))
2801               return TokenNameendfor;
2802             else if (
2803               (data[index] == 'l')
2804                 && (data[++index] == 's')
2805                 && (data[++index] == 'e')
2806                 && (data[++index] == 'i')
2807                 && (data[++index] == 'f'))
2808               return TokenNameelseif;
2809             else
2810               return TokenNameIdentifier;
2811           case 7 :
2812             if ((data[++index] == 'x')
2813               && (data[++index] == 't')
2814               && (data[++index] == 'e')
2815               && (data[++index] == 'n')
2816               && (data[++index] == 'd')
2817               && (data[++index] == 's'))
2818               return TokenNameextends;
2819             else
2820               return TokenNameIdentifier;
2821           case 8 : // endwhile
2822             if ((data[++index] == 'n')
2823               && (data[++index] == 'd')
2824               && (data[++index] == 'w')
2825               && (data[++index] == 'h')
2826               && (data[++index] == 'i')
2827               && (data[++index] == 'l')
2828               && (data[++index] == 'e'))
2829               return TokenNameendwhile;
2830             else
2831               return TokenNameIdentifier;
2832           case 9 : // endswitch
2833             if ((data[++index] == 'n')
2834               && (data[++index] == 'd')
2835               && (data[++index] == 's')
2836               && (data[++index] == 'w')
2837               && (data[++index] == 'i')
2838               && (data[++index] == 't')
2839               && (data[++index] == 'c')
2840               && (data[++index] == 'h'))
2841               return TokenNameendswitch;
2842             else
2843               return TokenNameIdentifier;
2844           case 10 : // endforeach
2845             if ((data[++index] == 'n')
2846               && (data[++index] == 'd')
2847               && (data[++index] == 'f')
2848               && (data[++index] == 'o')
2849               && (data[++index] == 'r')
2850               && (data[++index] == 'e')
2851               && (data[++index] == 'a')
2852               && (data[++index] == 'c')
2853               && (data[++index] == 'h'))
2854               return TokenNameendforeach;
2855             else
2856               return TokenNameIdentifier;
2857
2858           default :
2859             return TokenNameIdentifier;
2860         }
2861
2862       case 'f' : //for false function
2863         switch (length) {
2864           case 3 :
2865             if ((data[++index] == 'o') && (data[++index] == 'r'))
2866               return TokenNamefor;
2867             else
2868               return TokenNameIdentifier;
2869           case 5 :
2870             if ((data[++index] == 'a')
2871               && (data[++index] == 'l')
2872               && (data[++index] == 's')
2873               && (data[++index] == 'e'))
2874               return TokenNamefalse;
2875             else
2876               return TokenNameIdentifier;
2877           case 7 : // function
2878             if ((data[++index] == 'o')
2879               && (data[++index] == 'r')
2880               && (data[++index] == 'e')
2881               && (data[++index] == 'a')
2882               && (data[++index] == 'c')
2883               && (data[++index] == 'h'))
2884               return TokenNameforeach;
2885             else
2886               return TokenNameIdentifier;
2887           case 8 : // function
2888             if ((data[++index] == 'u')
2889               && (data[++index] == 'n')
2890               && (data[++index] == 'c')
2891               && (data[++index] == 't')
2892               && (data[++index] == 'i')
2893               && (data[++index] == 'o')
2894               && (data[++index] == 'n'))
2895               return TokenNamefunction;
2896             else
2897               return TokenNameIdentifier;
2898           default :
2899             return TokenNameIdentifier;
2900         }
2901       case 'g' : //global
2902         if (length == 6) {
2903           if ((data[++index] == 'l')
2904             && (data[++index] == 'o')
2905             && (data[++index] == 'b')
2906             && (data[++index] == 'a')
2907             && (data[++index] == 'l')) {
2908             return TokenNameglobal;
2909           }
2910         }
2911         return TokenNameIdentifier;
2912
2913       case 'i' : //if int 
2914         switch (length) {
2915           case 2 :
2916             if (data[++index] == 'f')
2917               return TokenNameif;
2918             else
2919               return TokenNameIdentifier;
2920             //          case 3 :
2921             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2922             //              return TokenNameint;
2923             //            else
2924             //              return TokenNameIdentifier;
2925           case 7 :
2926             if ((data[++index] == 'n')
2927               && (data[++index] == 'c')
2928               && (data[++index] == 'l')
2929               && (data[++index] == 'u')
2930               && (data[++index] == 'd')
2931               && (data[++index] == 'e'))
2932               return TokenNameinclude;
2933             else
2934               return TokenNameIdentifier;
2935           case 12 :
2936             if ((data[++index] == 'n')
2937               && (data[++index] == 'c')
2938               && (data[++index] == 'l')
2939               && (data[++index] == 'u')
2940               && (data[++index] == 'd')
2941               && (data[++index] == 'e')
2942               && (data[++index] == '_')
2943               && (data[++index] == 'o')
2944               && (data[++index] == 'n')
2945               && (data[++index] == 'c')
2946               && (data[++index] == 'e'))
2947               return TokenNameinclude_once;
2948             else
2949               return TokenNameIdentifier;
2950           default :
2951             return TokenNameIdentifier;
2952         }
2953
2954       case 'l' : //list
2955         if (length == 4) {
2956           if ((data[++index] == 'i')
2957             && (data[++index] == 's')
2958             && (data[++index] == 't')) {
2959             return TokenNamelist;
2960           }
2961         }
2962         return TokenNameIdentifier;
2963
2964       case 'n' : // new null
2965         switch (length) {
2966           case 3 :
2967             if ((data[++index] == 'e') && (data[++index] == 'w'))
2968               return TokenNamenew;
2969             else
2970               return TokenNameIdentifier;
2971           case 4 :
2972             if ((data[++index] == 'u')
2973               && (data[++index] == 'l')
2974               && (data[++index] == 'l'))
2975               return TokenNamenull;
2976             else
2977               return TokenNameIdentifier;
2978
2979           default :
2980             return TokenNameIdentifier;
2981         }
2982       case 'o' : // or old_function
2983         if (length == 2) {
2984           if (data[++index] == 'r') {
2985             return TokenNameOR;
2986           }
2987         }
2988         //        if (length == 12) {
2989         //          if ((data[++index] == 'l')
2990         //            && (data[++index] == 'd')
2991         //            && (data[++index] == '_')
2992         //            && (data[++index] == 'f')
2993         //            && (data[++index] == 'u')
2994         //            && (data[++index] == 'n')
2995         //            && (data[++index] == 'c')
2996         //            && (data[++index] == 't')
2997         //            && (data[++index] == 'i')
2998         //            && (data[++index] == 'o')
2999         //            && (data[++index] == 'n')) {
3000         //            return TokenNameold_function;
3001         //          }
3002         //        }
3003         return TokenNameIdentifier;
3004
3005       case 'p' : // print
3006         if (length == 5) {
3007           if ((data[++index] == 'r')
3008             && (data[++index] == 'i')
3009             && (data[++index] == 'n')
3010             && (data[++index] == 't')) {
3011             return TokenNameprint;
3012           }
3013         }
3014         return TokenNameIdentifier;
3015       case 'r' : //return require require_once
3016         if (length == 6) {
3017           if ((data[++index] == 'e')
3018             && (data[++index] == 't')
3019             && (data[++index] == 'u')
3020             && (data[++index] == 'r')
3021             && (data[++index] == 'n')) {
3022             return TokenNamereturn;
3023           }
3024         } else if (length == 7) {
3025           if ((data[++index] == 'e')
3026             && (data[++index] == 'q')
3027             && (data[++index] == 'u')
3028             && (data[++index] == 'i')
3029             && (data[++index] == 'r')
3030             && (data[++index] == 'e')) {
3031             return TokenNamerequire;
3032           }
3033         } else if (length == 12) {
3034           if ((data[++index] == 'e')
3035             && (data[++index] == 'q')
3036             && (data[++index] == 'u')
3037             && (data[++index] == 'i')
3038             && (data[++index] == 'r')
3039             && (data[++index] == 'e')
3040             && (data[++index] == '_')
3041             && (data[++index] == 'o')
3042             && (data[++index] == 'n')
3043             && (data[++index] == 'c')
3044             && (data[++index] == 'e')) {
3045             return TokenNamerequire_once;
3046           }
3047         } else
3048           return TokenNameIdentifier;
3049
3050       case 's' : //static switch 
3051         switch (length) {
3052           case 6 :
3053             if (data[++index] == 't')
3054               if ((data[++index] == 'a')
3055                 && (data[++index] == 't')
3056                 && (data[++index] == 'i')
3057                 && (data[++index] == 'c')) {
3058                 return TokenNamestatic;
3059               } else
3060                 return TokenNameIdentifier;
3061             else if (
3062               (data[index] == 'w')
3063                 && (data[++index] == 'i')
3064                 && (data[++index] == 't')
3065                 && (data[++index] == 'c')
3066                 && (data[++index] == 'h'))
3067               return TokenNameswitch;
3068             else
3069               return TokenNameIdentifier;
3070           default :
3071             return TokenNameIdentifier;
3072         }
3073
3074       case 't' : // true
3075         switch (length) {
3076
3077           case 4 :
3078             if ((data[++index] == 'r')
3079               && (data[++index] == 'u')
3080               && (data[++index] == 'e'))
3081               return TokenNametrue;
3082             else
3083               return TokenNameIdentifier;
3084             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3085             //              return TokenNamethis;
3086
3087           default :
3088             return TokenNameIdentifier;
3089         }
3090
3091       case 'v' : //var 
3092         switch (length) {
3093           case 3 :
3094             if ((data[++index] == 'a') && (data[++index] == 'r'))
3095               return TokenNamevar;
3096             else
3097               return TokenNameIdentifier;
3098
3099           default :
3100             return TokenNameIdentifier;
3101         }
3102
3103       case 'w' : //while 
3104         switch (length) {
3105           case 5 :
3106             if ((data[++index] == 'h')
3107               && (data[++index] == 'i')
3108               && (data[++index] == 'l')
3109               && (data[++index] == 'e'))
3110               return TokenNamewhile;
3111             else
3112               return TokenNameIdentifier;
3113             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3114             //return TokenNamewidefp ;
3115             //else
3116             //return TokenNameIdentifier;
3117           default :
3118             return TokenNameIdentifier;
3119         }
3120
3121       case 'x' : //xor
3122         switch (length) {
3123           case 3 :
3124             if ((data[++index] == 'o') && (data[++index] == 'r'))
3125               return TokenNameXOR;
3126             else
3127               return TokenNameIdentifier;
3128
3129           default :
3130             return TokenNameIdentifier;
3131         }
3132       default :
3133         return TokenNameIdentifier;
3134     }
3135   }
3136   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3137
3138     //when entering this method the currentCharacter is the firt
3139     //digit of the number , i.e. it may be preceeded by a . when
3140     //dotPrefix is true
3141
3142     boolean floating = dotPrefix;
3143     if ((!dotPrefix) && (currentCharacter == '0')) {
3144       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3145         //force the first char of the hexa number do exist...
3146         // consume next character
3147         unicodeAsBackSlash = false;
3148         currentCharacter = source[currentPosition++];
3149 //        if (((currentCharacter = source[currentPosition++]) == '\\')
3150 //          && (source[currentPosition] == 'u')) {
3151 //          getNextUnicodeChar();
3152 //        } else {
3153 //          if (withoutUnicodePtr != 0) {
3154 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3155 //          }
3156 //        }
3157         if (Character.digit(currentCharacter, 16) == -1)
3158           throw new InvalidInputException(INVALID_HEXA);
3159         //---end forcing--
3160         while (getNextCharAsDigit(16)) {
3161         };
3162         //        if (getNextChar('l', 'L') >= 0)
3163         //          return TokenNameLongLiteral;
3164         //        else
3165         return TokenNameIntegerLiteral;
3166       }
3167
3168       //there is x or X in the number
3169       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3170       if (getNextCharAsDigit()) {
3171         //-------------potential octal-----------------
3172         while (getNextCharAsDigit()) {
3173         };
3174
3175         //        if (getNextChar('l', 'L') >= 0) {
3176         //          return TokenNameLongLiteral;
3177         //        }
3178         //
3179         //        if (getNextChar('f', 'F') >= 0) {
3180         //          return TokenNameFloatingPointLiteral;
3181         //        }
3182
3183         if (getNextChar('d', 'D') >= 0) {
3184           return TokenNameDoubleLiteral;
3185         } else { //make the distinction between octal and float ....
3186           if (getNextChar('.')) { //bingo ! ....
3187             while (getNextCharAsDigit()) {
3188             };
3189             if (getNextChar('e', 'E') >= 0) {
3190               // consume next character
3191               unicodeAsBackSlash = false;
3192               currentCharacter = source[currentPosition++];
3193 //              if (((currentCharacter = source[currentPosition++]) == '\\')
3194 //                && (source[currentPosition] == 'u')) {
3195 //                getNextUnicodeChar();
3196 //              } else {
3197 //                if (withoutUnicodePtr != 0) {
3198 //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3199 //                }
3200 //              }
3201
3202               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3203                 // consume next character
3204                 unicodeAsBackSlash = false;
3205                 currentCharacter = source[currentPosition++];
3206 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3207 //                  && (source[currentPosition] == 'u')) {
3208 //                  getNextUnicodeChar();
3209 //                } else {
3210 //                  if (withoutUnicodePtr != 0) {
3211 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3212 //                      currentCharacter;
3213 //                  }
3214 //                }
3215               }
3216               if (!Character.isDigit(currentCharacter))
3217                 throw new InvalidInputException(INVALID_FLOAT);
3218               while (getNextCharAsDigit()) {
3219               };
3220             }
3221             //            if (getNextChar('f', 'F') >= 0)
3222             //              return TokenNameFloatingPointLiteral;
3223             getNextChar('d', 'D'); //jump over potential d or D
3224             return TokenNameDoubleLiteral;
3225           } else {
3226             return TokenNameIntegerLiteral;
3227           }
3228         }
3229       } else {
3230         /* carry on */
3231       }
3232     }
3233
3234     while (getNextCharAsDigit()) {
3235     };
3236
3237     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3238     //      return TokenNameLongLiteral;
3239
3240     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3241       while (getNextCharAsDigit()) {
3242       };
3243       floating = true;
3244     }
3245
3246     //if floating is true both exponant and suffix may be optional
3247
3248     if (getNextChar('e', 'E') >= 0) {
3249       floating = true;
3250       // consume next character
3251       unicodeAsBackSlash = false;
3252       currentCharacter = source[currentPosition++];
3253 //      if (((currentCharacter = source[currentPosition++]) == '\\')
3254 //        && (source[currentPosition] == 'u')) {
3255 //        getNextUnicodeChar();
3256 //      } else {
3257 //        if (withoutUnicodePtr != 0) {
3258 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3259 //        }
3260 //      }
3261
3262       if ((currentCharacter == '-')
3263         || (currentCharacter == '+')) { // consume next character
3264         unicodeAsBackSlash = false;
3265         currentCharacter = source[currentPosition++];
3266 //        if (((currentCharacter = source[currentPosition++]) == '\\')
3267 //          && (source[currentPosition] == 'u')) {
3268 //          getNextUnicodeChar();
3269 //        } else {
3270 //          if (withoutUnicodePtr != 0) {
3271 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3272 //          }
3273 //        }
3274       }
3275       if (!Character.isDigit(currentCharacter))
3276         throw new InvalidInputException(INVALID_FLOAT);
3277       while (getNextCharAsDigit()) {
3278       };
3279     }
3280
3281     if (getNextChar('d', 'D') >= 0)
3282       return TokenNameDoubleLiteral;
3283     //    if (getNextChar('f', 'F') >= 0)
3284     //      return TokenNameFloatingPointLiteral;
3285
3286     //the long flag has been tested before
3287
3288     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3289   }
3290   /**
3291    * Search the line number corresponding to a specific position
3292    *
3293    */
3294   public final int getLineNumber(int position) {
3295
3296     if (lineEnds == null)
3297       return 1;
3298     int length = linePtr + 1;
3299     if (length == 0)
3300       return 1;
3301     int g = 0, d = length - 1;
3302     int m = 0;
3303     while (g <= d) {
3304       m = (g + d) / 2;
3305       if (position < lineEnds[m]) {
3306         d = m - 1;
3307       } else if (position > lineEnds[m]) {
3308         g = m + 1;
3309       } else {
3310         return m + 1;
3311       }
3312     }
3313     if (position < lineEnds[m]) {
3314       return m + 1;
3315     }
3316     return m + 2;
3317   }
3318
3319   public void setPHPMode(boolean mode) {
3320     phpMode = mode;
3321   }
3322
3323   public final void setSource(char[] source) {
3324     //the source-buffer is set to sourceString
3325
3326     if (source == null) {
3327       this.source = new char[0];
3328     } else {
3329       this.source = source;
3330     }
3331     startPosition = -1;
3332     initialPosition = currentPosition = 0;
3333     containsAssertKeyword = false;
3334     withoutUnicodeBuffer = new char[this.source.length];
3335
3336   }
3337
3338   public String toString() {
3339     if (startPosition == source.length)
3340       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3341     if (currentPosition > source.length)
3342       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3343
3344     char front[] = new char[startPosition];
3345     System.arraycopy(source, 0, front, 0, startPosition);
3346
3347     int middleLength = (currentPosition - 1) - startPosition + 1;
3348     char middle[];
3349     if (middleLength > -1) {
3350       middle = new char[middleLength];
3351       System.arraycopy(source, startPosition, middle, 0, middleLength);
3352     } else {
3353       middle = new char[0];
3354     }
3355
3356     char end[] = new char[source.length - (currentPosition - 1)];
3357     System.arraycopy(
3358       source,
3359       (currentPosition - 1) + 1,
3360       end,
3361       0,
3362       source.length - (currentPosition - 1) - 1);
3363
3364     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3365     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3366     + new String(end);
3367   }
3368   public final String toStringAction(int act) {
3369     switch (act) {
3370       case TokenNameERROR :
3371         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3372       case TokenNameStopPHP :
3373         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3374       case TokenNameIdentifier :
3375         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3376       case TokenNameVariable :
3377         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3378       case TokenNameas :
3379         return "as"; //$NON-NLS-1$
3380       case TokenNamebreak :
3381         return "break"; //$NON-NLS-1$
3382       case TokenNamecase :
3383         return "case"; //$NON-NLS-1$
3384       case TokenNameclass :
3385         return "class"; //$NON-NLS-1$
3386       case TokenNamecontinue :
3387         return "continue"; //$NON-NLS-1$
3388       case TokenNamedefault :
3389         return "default"; //$NON-NLS-1$
3390       case TokenNamedefine :
3391         return "define"; //$NON-NLS-1$
3392       case TokenNamedo :
3393         return "do"; //$NON-NLS-1$
3394       case TokenNameecho :
3395         return "echo"; //$NON-NLS-1$
3396       case TokenNameelse :
3397         return "else"; //$NON-NLS-1$
3398       case TokenNameelseif :
3399         return "elseif"; //$NON-NLS-1$
3400       case TokenNameendfor :
3401         return "endfor"; //$NON-NLS-1$
3402       case TokenNameendforeach :
3403         return "endforeach"; //$NON-NLS-1$
3404       case TokenNameendif :
3405         return "endif"; //$NON-NLS-1$
3406       case TokenNameendswitch :
3407         return "endswitch"; //$NON-NLS-1$
3408       case TokenNameendwhile :
3409         return "endwhile"; //$NON-NLS-1$
3410       case TokenNameextends :
3411         return "extends"; //$NON-NLS-1$
3412       case TokenNamefalse :
3413         return "false"; //$NON-NLS-1$
3414       case TokenNamefor :
3415         return "for"; //$NON-NLS-1$
3416       case TokenNameforeach :
3417         return "foreach"; //$NON-NLS-1$
3418       case TokenNamefunction :
3419         return "function"; //$NON-NLS-1$
3420       case TokenNameglobal :
3421         return "global"; //$NON-NLS-1$
3422       case TokenNameif :
3423         return "if"; //$NON-NLS-1$
3424       case TokenNameinclude :
3425         return "include"; //$NON-NLS-1$
3426       case TokenNameinclude_once :
3427         return "include_once"; //$NON-NLS-1$
3428       case TokenNamelist :
3429         return "list"; //$NON-NLS-1$
3430       case TokenNamenew :
3431         return "new"; //$NON-NLS-1$
3432       case TokenNamenull :
3433         return "null"; //$NON-NLS-1$
3434       case TokenNameprint :
3435         return "print"; //$NON-NLS-1$
3436       case TokenNamerequire :
3437         return "require"; //$NON-NLS-1$
3438       case TokenNamerequire_once :
3439         return "require_once"; //$NON-NLS-1$
3440       case TokenNamereturn :
3441         return "return"; //$NON-NLS-1$
3442       case TokenNamestatic :
3443         return "static"; //$NON-NLS-1$
3444       case TokenNameswitch :
3445         return "switch"; //$NON-NLS-1$
3446       case TokenNametrue :
3447         return "true"; //$NON-NLS-1$
3448       case TokenNamevar :
3449         return "var"; //$NON-NLS-1$
3450       case TokenNamewhile :
3451         return "while"; //$NON-NLS-1$
3452       case TokenNameIntegerLiteral :
3453         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3454       case TokenNameDoubleLiteral :
3455         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3456       case TokenNameStringLiteral :
3457         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3458       case TokenNameStringConstant :
3459         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3460       case TokenNameStringInterpolated :
3461         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3462       case TokenNameHEREDOC :
3463         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3464
3465       case TokenNamePLUS_PLUS :
3466         return "++"; //$NON-NLS-1$
3467       case TokenNameMINUS_MINUS :
3468         return "--"; //$NON-NLS-1$
3469       case TokenNameEQUAL_EQUAL :
3470         return "=="; //$NON-NLS-1$
3471       case TokenNameEQUAL_GREATER :
3472         return "=>"; //$NON-NLS-1$
3473       case TokenNameLESS_EQUAL :
3474         return "<="; //$NON-NLS-1$
3475       case TokenNameGREATER_EQUAL :
3476         return ">="; //$NON-NLS-1$
3477       case TokenNameNOT_EQUAL :
3478         return "!="; //$NON-NLS-1$
3479       case TokenNameLEFT_SHIFT :
3480         return "<<"; //$NON-NLS-1$
3481       case TokenNameRIGHT_SHIFT :
3482         return ">>"; //$NON-NLS-1$
3483       case TokenNamePLUS_EQUAL :
3484         return "+="; //$NON-NLS-1$
3485       case TokenNameMINUS_EQUAL :
3486         return "-="; //$NON-NLS-1$
3487       case TokenNameMULTIPLY_EQUAL :
3488         return "*="; //$NON-NLS-1$
3489       case TokenNameDIVIDE_EQUAL :
3490         return "/="; //$NON-NLS-1$
3491       case TokenNameAND_EQUAL :
3492         return "&="; //$NON-NLS-1$
3493       case TokenNameOR_EQUAL :
3494         return "|="; //$NON-NLS-1$
3495       case TokenNameXOR_EQUAL :
3496         return "^="; //$NON-NLS-1$
3497       case TokenNameREMAINDER_EQUAL :
3498         return "%="; //$NON-NLS-1$
3499       case TokenNameLEFT_SHIFT_EQUAL :
3500         return "<<="; //$NON-NLS-1$
3501       case TokenNameRIGHT_SHIFT_EQUAL :
3502         return ">>="; //$NON-NLS-1$
3503       case TokenNameOR_OR :
3504         return "||"; //$NON-NLS-1$
3505       case TokenNameAND_AND :
3506         return "&&"; //$NON-NLS-1$
3507       case TokenNamePLUS :
3508         return "+"; //$NON-NLS-1$
3509       case TokenNameMINUS :
3510         return "-"; //$NON-NLS-1$
3511       case TokenNameMINUS_GREATER :
3512         return "->";
3513       case TokenNameNOT :
3514         return "!"; //$NON-NLS-1$
3515       case TokenNameREMAINDER :
3516         return "%"; //$NON-NLS-1$
3517       case TokenNameXOR :
3518         return "^"; //$NON-NLS-1$
3519       case TokenNameAND :
3520         return "&"; //$NON-NLS-1$
3521       case TokenNameMULTIPLY :
3522         return "*"; //$NON-NLS-1$
3523       case TokenNameOR :
3524         return "|"; //$NON-NLS-1$
3525       case TokenNameTWIDDLE :
3526         return "~"; //$NON-NLS-1$
3527       case TokenNameTWIDDLE_EQUAL :
3528         return "~="; //$NON-NLS-1$
3529       case TokenNameDIVIDE :
3530         return "/"; //$NON-NLS-1$
3531       case TokenNameGREATER :
3532         return ">"; //$NON-NLS-1$
3533       case TokenNameLESS :
3534         return "<"; //$NON-NLS-1$
3535       case TokenNameLPAREN :
3536         return "("; //$NON-NLS-1$
3537       case TokenNameRPAREN :
3538         return ")"; //$NON-NLS-1$
3539       case TokenNameLBRACE :
3540         return "{"; //$NON-NLS-1$
3541       case TokenNameRBRACE :
3542         return "}"; //$NON-NLS-1$
3543       case TokenNameLBRACKET :
3544         return "["; //$NON-NLS-1$
3545       case TokenNameRBRACKET :
3546         return "]"; //$NON-NLS-1$
3547       case TokenNameSEMICOLON :
3548         return ";"; //$NON-NLS-1$
3549       case TokenNameQUESTION :
3550         return "?"; //$NON-NLS-1$
3551       case TokenNameCOLON :
3552         return ":"; //$NON-NLS-1$
3553       case TokenNameCOMMA :
3554         return ","; //$NON-NLS-1$
3555       case TokenNameDOT :
3556         return "."; //$NON-NLS-1$
3557       case TokenNameEQUAL :
3558         return "="; //$NON-NLS-1$
3559       case TokenNameAT :
3560         return "@";
3561       case TokenNameDOLLAR_LBRACE :
3562         return "${";
3563       case TokenNameEOF :
3564         return "EOF"; //$NON-NLS-1$
3565       default :
3566         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3567     }
3568   }
3569
3570   public Scanner(
3571     boolean tokenizeComments,
3572     boolean tokenizeWhiteSpace,
3573     boolean checkNonExternalizedStringLiterals) {
3574     this(
3575       tokenizeComments,
3576       tokenizeWhiteSpace,
3577       checkNonExternalizedStringLiterals,
3578       false);
3579   }
3580
3581   public Scanner(
3582     boolean tokenizeComments,
3583     boolean tokenizeWhiteSpace,
3584     boolean checkNonExternalizedStringLiterals,
3585     boolean assertMode) {
3586     this.eofPosition = Integer.MAX_VALUE;
3587     this.tokenizeComments = tokenizeComments;
3588     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3589     this.checkNonExternalizedStringLiterals =
3590       checkNonExternalizedStringLiterals;
3591     this.assertMode = assertMode;
3592   }
3593
3594   private void checkNonExternalizeString() throws InvalidInputException {
3595     if (currentLine == null)
3596       return;
3597     parseTags(currentLine);
3598   }
3599
3600   private void parseTags(NLSLine line) throws InvalidInputException {
3601     String s = new String(getCurrentTokenSource());
3602     int pos = s.indexOf(TAG_PREFIX);
3603     int lineLength = line.size();
3604     while (pos != -1) {
3605       int start = pos + TAG_PREFIX_LENGTH;
3606       int end = s.indexOf(TAG_POSTFIX, start);
3607       String index = s.substring(start, end);
3608       int i = 0;
3609       try {
3610         i = Integer.parseInt(index) - 1;
3611         // Tags are one based not zero based.
3612       } catch (NumberFormatException e) {
3613         i = -1; // we don't want to consider this as a valid NLS tag
3614       }
3615       if (line.exists(i)) {
3616         line.set(i, null);
3617       }
3618       pos = s.indexOf(TAG_PREFIX, start);
3619     }
3620
3621     this.nonNLSStrings = new StringLiteral[lineLength];
3622     int nonNLSCounter = 0;
3623     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3624       StringLiteral literal = (StringLiteral) iterator.next();
3625       if (literal != null) {
3626         this.nonNLSStrings[nonNLSCounter++] = literal;
3627       }
3628     }
3629     if (nonNLSCounter == 0) {
3630       this.nonNLSStrings = null;
3631       currentLine = null;
3632       return;
3633     }
3634     this.wasNonExternalizedStringLiteral = true;
3635     if (nonNLSCounter != lineLength) {
3636       System.arraycopy(
3637         this.nonNLSStrings,
3638         0,
3639         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3640         0,
3641         nonNLSCounter);
3642     }
3643     currentLine = null;
3644   }
3645 }