Deleted unicode handling from scanner
[phpeclipse.git] / net.sourceforge.phpeclipse / src / net / sourceforge / phpdt / internal / compiler / parser / Scanner.java
1 /*******************************************************************************
2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
3  * All rights reserved. This program and the accompanying materials 
4  * are made available under the terms of the Common Public License v0.5 
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/cpl-v05.html
7  * 
8  * Contributors:
9  *     IBM Corporation - initial API and implementation
10  ******************************************************************************/
11 package net.sourceforge.phpdt.internal.compiler.parser;
12
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16
17 import net.sourceforge.phpdt.core.compiler.*;
18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
19
20 public class Scanner implements IScanner, ITerminalSymbols {
21
22   /* APIs ares
23    - getNextToken() which return the current type of the token
24      (this value is not memorized by the scanner)
25    - getCurrentTokenSource() which provides with the token "REAL" source
26      (aka all unicode have been transformed into a correct char)
27    - sourceStart gives the position into the stream
28    - currentPosition-1 gives the sourceEnd position into the stream 
29   */
30
31   // 1.4 feature 
32   private boolean assertMode;
33   public boolean useAssertAsAnIndentifier = false;
34   //flag indicating if processed source contains occurrences of keyword assert 
35   public boolean containsAssertKeyword = false;
36
37   public boolean recordLineSeparator;
38   public boolean phpMode = false;
39
40   public char currentCharacter;
41   public int startPosition;
42   public int currentPosition;
43   public int initialPosition, eofPosition;
44   // after this position eof are generated instead of real token from the source
45
46   public boolean tokenizeComments;
47   public boolean tokenizeWhiteSpace;
48
49   //source should be viewed as a window (aka a part)
50   //of a entire very large stream
51   public char source[];
52
53   //unicode support
54   public char[] withoutUnicodeBuffer;
55   public int withoutUnicodePtr;
56   //when == 0 ==> no unicode in the current token
57   public boolean unicodeAsBackSlash = false;
58
59   public boolean scanningFloatLiteral = false;
60
61   //support for /** comments
62   //public char[][] comments = new char[10][];
63   public int[] commentStops = new int[10];
64   public int[] commentStarts = new int[10];
65   public int commentPtr = -1; // no comment test with commentPtr value -1
66
67   //diet parsing support - jump over some method body when requested
68   public boolean diet = false;
69
70   //support for the  poor-line-debuggers ....
71   //remember the position of the cr/lf
72   public int[] lineEnds = new int[250];
73   public int linePtr = -1;
74   public boolean wasAcr = false;
75
76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
77
78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
85
86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
90
91   //----------------optimized identifier managment------------------
92   static final char[] charArray_a = new char[] { 'a' },
93     charArray_b = new char[] { 'b' },
94     charArray_c = new char[] { 'c' },
95     charArray_d = new char[] { 'd' },
96     charArray_e = new char[] { 'e' },
97     charArray_f = new char[] { 'f' },
98     charArray_g = new char[] { 'g' },
99     charArray_h = new char[] { 'h' },
100     charArray_i = new char[] { 'i' },
101     charArray_j = new char[] { 'j' },
102     charArray_k = new char[] { 'k' },
103     charArray_l = new char[] { 'l' },
104     charArray_m = new char[] { 'm' },
105     charArray_n = new char[] { 'n' },
106     charArray_o = new char[] { 'o' },
107     charArray_p = new char[] { 'p' },
108     charArray_q = new char[] { 'q' },
109     charArray_r = new char[] { 'r' },
110     charArray_s = new char[] { 's' },
111     charArray_t = new char[] { 't' },
112     charArray_u = new char[] { 'u' },
113     charArray_v = new char[] { 'v' },
114     charArray_w = new char[] { 'w' },
115     charArray_x = new char[] { 'x' },
116     charArray_y = new char[] { 'y' },
117     charArray_z = new char[] { 'z' };
118
119   static final char[] initCharArray =
120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
121   static final int TableSize = 30, InternalTableSize = 6;
122   //30*6 = 180 entries
123   public static final int OptimizedLength = 6;
124   public /*static*/
125   final char[][][][] charArray_length =
126     new char[OptimizedLength][TableSize][InternalTableSize][];
127   // support for detecting non-externalized string literals
128   int currentLineNr = -1;
129   int previousLineNr = -1;
130   NLSLine currentLine = null;
131   List lines = new ArrayList();
132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
136   public StringLiteral[] nonNLSStrings = null;
137   public boolean checkNonExternalizedStringLiterals = true;
138   public boolean wasNonExternalizedStringLiteral = false;
139
140   /*static*/ {
141     for (int i = 0; i < 6; i++) {
142       for (int j = 0; j < TableSize; j++) {
143         for (int k = 0; k < InternalTableSize; k++) {
144           charArray_length[i][j][k] = initCharArray;
145         }
146       }
147     }
148   }
149   static int newEntry2 = 0,
150     newEntry3 = 0,
151     newEntry4 = 0,
152     newEntry5 = 0,
153     newEntry6 = 0;
154
155   public static final int RoundBracket = 0;
156   public static final int SquareBracket = 1;
157   public static final int CurlyBracket = 2;
158   public static final int BracketKinds = 3;
159
160   public static final boolean DEBUG = false;
161   public Scanner() {
162     this(false, false);
163   }
164   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
165     this(tokenizeComments, tokenizeWhiteSpace, false);
166   }
167
168   /**
169    * Determines if the specified character is
170    * permissible as the first character in a PHP identifier
171    */
172   public static boolean isPHPIdentifierStart(char ch) {
173     return Character.isLetter(ch)
174       || (ch == '_')
175       || (0x7F <= ch && ch <= 0xFF);
176   }
177
178   /**
179    * Determines if the specified character may be part of a PHP identifier as
180    * other than the first character
181    */
182   public static boolean isPHPIdentifierPart(char ch) {
183     return Character.isLetterOrDigit(ch)
184       || (ch == '_')
185       || (0x7F <= ch && ch <= 0xFF);
186   }
187
188   public final boolean atEnd() {
189     // This code is not relevant if source is 
190     // Only a part of the real stream input
191
192     return source.length == currentPosition;
193   }
194   public char[] getCurrentIdentifierSource() {
195     //return the token REAL source (aka unicodes are precomputed)
196
197     char[] result;
198 //    if (withoutUnicodePtr != 0)
199 //      //0 is used as a fast test flag so the real first char is in position 1
200 //      System.arraycopy(
201 //        withoutUnicodeBuffer,
202 //        1,
203 //        result = new char[withoutUnicodePtr],
204 //        0,
205 //        withoutUnicodePtr);
206 //    else {
207       int length = currentPosition - startPosition;
208       switch (length) { // see OptimizedLength
209         case 1 :
210           return optimizedCurrentTokenSource1();
211         case 2 :
212           return optimizedCurrentTokenSource2();
213         case 3 :
214           return optimizedCurrentTokenSource3();
215         case 4 :
216           return optimizedCurrentTokenSource4();
217         case 5 :
218           return optimizedCurrentTokenSource5();
219         case 6 :
220           return optimizedCurrentTokenSource6();
221       }
222       //no optimization
223       System.arraycopy(
224         source,
225         startPosition,
226         result = new char[length],
227         0,
228         length);
229  //   }
230     return result;
231   }
232   public int getCurrentTokenEndPosition() {
233     return this.currentPosition - 1;
234   }
235   public final char[] getCurrentTokenSource() {
236     // Return the token REAL source (aka unicodes are precomputed)
237
238     char[] result;
239 //    if (withoutUnicodePtr != 0)
240 //      // 0 is used as a fast test flag so the real first char is in position 1
241 //      System.arraycopy(
242 //        withoutUnicodeBuffer,
243 //        1,
244 //        result = new char[withoutUnicodePtr],
245 //        0,
246 //        withoutUnicodePtr);
247 //    else {
248       int length;
249       System.arraycopy(
250         source,
251         startPosition,
252         result = new char[length = currentPosition - startPosition],
253         0,
254         length);
255 //    }
256     return result;
257   }
258
259   public final char[] getCurrentTokenSource(int startPos) {
260     // Return the token REAL source (aka unicodes are precomputed)
261
262     char[] result;
263 //    if (withoutUnicodePtr != 0)
264 //      // 0 is used as a fast test flag so the real first char is in position 1
265 //      System.arraycopy(
266 //        withoutUnicodeBuffer,
267 //        1,
268 //        result = new char[withoutUnicodePtr],
269 //        0,
270 //        withoutUnicodePtr);
271 //    else {
272       int length;
273       System.arraycopy(
274         source,
275         startPos,
276         result = new char[length = currentPosition - startPos],
277         0,
278         length);
279   //  }
280     return result;
281   }
282
283   public final char[] getCurrentTokenSourceString() {
284     //return the token REAL source (aka unicodes are precomputed).
285     //REMOVE the two " that are at the beginning and the end.
286
287     char[] result;
288     if (withoutUnicodePtr != 0)
289       //0 is used as a fast test flag so the real first char is in position 1
290       System.arraycopy(withoutUnicodeBuffer, 2,
291       //2 is 1 (real start) + 1 (to jump over the ")
292       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
293     else {
294       int length;
295       System.arraycopy(
296         source,
297         startPosition + 1,
298         result = new char[length = currentPosition - startPosition - 2],
299         0,
300         length);
301     }
302     return result;
303   }
304   public int getCurrentTokenStartPosition() {
305     return this.startPosition;
306   }
307   /*
308    * Search the source position corresponding to the end of a given line number
309    *
310    * Line numbers are 1-based, and relative to the scanner initialPosition. 
311    * Character positions are 0-based.
312    *
313    * In case the given line number is inconsistent, answers -1.
314    */
315   public final int getLineEnd(int lineNumber) {
316
317     if (lineEnds == null)
318       return -1;
319     if (lineNumber >= lineEnds.length)
320       return -1;
321     if (lineNumber <= 0)
322       return -1;
323
324     if (lineNumber == lineEnds.length - 1)
325       return eofPosition;
326     return lineEnds[lineNumber - 1];
327     // next line start one character behind the lineEnd of the previous line
328   }
329   /**
330    * Search the source position corresponding to the beginning of a given line number
331    *
332    * Line numbers are 1-based, and relative to the scanner initialPosition. 
333    * Character positions are 0-based.
334    *
335    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
336    *
337    * In case the given line number is inconsistent, answers -1.
338    */
339   public final int getLineStart(int lineNumber) {
340
341     if (lineEnds == null)
342       return -1;
343     if (lineNumber >= lineEnds.length)
344       return -1;
345     if (lineNumber <= 0)
346       return -1;
347
348     if (lineNumber == 1)
349       return initialPosition;
350     return lineEnds[lineNumber - 2] + 1;
351     // next line start one character behind the lineEnd of the previous line
352   }
353   public final boolean getNextChar(char testedChar) {
354     //BOOLEAN
355     //handle the case of unicode.
356     //when a unicode appears then we must use a buffer that holds char internal values
357     //At the end of this method currentCharacter holds the new visited char
358     //and currentPosition points right next after it
359     //Both previous lines are true if the currentCharacter is == to the testedChar
360     //On false, no side effect has occured.
361
362     //ALL getNextChar.... ARE OPTIMIZED COPIES 
363
364     int temp = currentPosition;
365     try {
366       currentCharacter = source[currentPosition++];
367 //      if (((currentCharacter = source[currentPosition++]) == '\\')
368 //        && (source[currentPosition] == 'u')) {
369 //        //-------------unicode traitement ------------
370 //        int c1, c2, c3, c4;
371 //        int unicodeSize = 6;
372 //        currentPosition++;
373 //        while (source[currentPosition] == 'u') {
374 //          currentPosition++;
375 //          unicodeSize++;
376 //        }
377 //
378 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
379 //          || c1 < 0)
380 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
381 //            || c2 < 0)
382 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
383 //            || c3 < 0)
384 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
385 //            || c4 < 0)) {
386 //          currentPosition = temp;
387 //          return false;
388 //        }
389 //
390 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
391 //        if (currentCharacter != testedChar) {
392 //          currentPosition = temp;
393 //          return false;
394 //        }
395 //        unicodeAsBackSlash = currentCharacter == '\\';
396 //
397 //        //need the unicode buffer
398 //        if (withoutUnicodePtr == 0) {
399 //          //buffer all the entries that have been left aside....
400 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
401 //          System.arraycopy(
402 //            source,
403 //            startPosition,
404 //            withoutUnicodeBuffer,
405 //            1,
406 //            withoutUnicodePtr);
407 //        }
408 //        //fill the buffer with the char
409 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
410 //        return true;
411 //
412 //      } //-------------end unicode traitement--------------
413 //      else {
414         if (currentCharacter != testedChar) {
415           currentPosition = temp;
416           return false;
417         }
418         unicodeAsBackSlash = false;
419 //        if (withoutUnicodePtr != 0)
420 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
421         return true;
422 //      }
423     } catch (IndexOutOfBoundsException e) {
424       unicodeAsBackSlash = false;
425       currentPosition = temp;
426       return false;
427     }
428   }
429   public final int getNextChar(char testedChar1, char testedChar2) {
430     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
431     //test can be done with (x==0) for the first and (x>0) for the second
432     //handle the case of unicode.
433     //when a unicode appears then we must use a buffer that holds char internal values
434     //At the end of this method currentCharacter holds the new visited char
435     //and currentPosition points right next after it
436     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
437     //On false, no side effect has occured.
438
439     //ALL getNextChar.... ARE OPTIMIZED COPIES 
440
441     int temp = currentPosition;
442     try {
443       int result;
444       currentCharacter = source[currentPosition++];
445 //      if (((currentCharacter = source[currentPosition++]) == '\\')
446 //        && (source[currentPosition] == 'u')) {
447 //        //-------------unicode traitement ------------
448 //        int c1, c2, c3, c4;
449 //        int unicodeSize = 6;
450 //        currentPosition++;
451 //        while (source[currentPosition] == 'u') {
452 //          currentPosition++;
453 //          unicodeSize++;
454 //        }
455 //
456 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
457 //          || c1 < 0)
458 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
459 //            || c2 < 0)
460 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
461 //            || c3 < 0)
462 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
463 //            || c4 < 0)) {
464 //          currentPosition = temp;
465 //          return 2;
466 //        }
467 //
468 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
469 //        if (currentCharacter == testedChar1)
470 //          result = 0;
471 //        else if (currentCharacter == testedChar2)
472 //          result = 1;
473 //        else {
474 //          currentPosition = temp;
475 //          return -1;
476 //        }
477 //
478 //        //need the unicode buffer
479 //        if (withoutUnicodePtr == 0) {
480 //          //buffer all the entries that have been left aside....
481 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
482 //          System.arraycopy(
483 //            source,
484 //            startPosition,
485 //            withoutUnicodeBuffer,
486 //            1,
487 //            withoutUnicodePtr);
488 //        }
489 //        //fill the buffer with the char
490 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
491 //        return result;
492 //      } //-------------end unicode traitement--------------
493 //      else {
494         if (currentCharacter == testedChar1)
495           result = 0;
496         else if (currentCharacter == testedChar2)
497           result = 1;
498         else {
499           currentPosition = temp;
500           return -1;
501         }
502
503 //        if (withoutUnicodePtr != 0)
504 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
505         return result;
506  //     }
507     } catch (IndexOutOfBoundsException e) {
508       currentPosition = temp;
509       return -1;
510     }
511   }
512   public final boolean getNextCharAsDigit() {
513     //BOOLEAN
514     //handle the case of unicode.
515     //when a unicode appears then we must use a buffer that holds char internal values
516     //At the end of this method currentCharacter holds the new visited char
517     //and currentPosition points right next after it
518     //Both previous lines are true if the currentCharacter is a digit
519     //On false, no side effect has occured.
520
521     //ALL getNextChar.... ARE OPTIMIZED COPIES 
522
523     int temp = currentPosition;
524     try {
525       currentCharacter = source[currentPosition++];
526 //      if (((currentCharacter = source[currentPosition++]) == '\\')
527 //        && (source[currentPosition] == 'u')) {
528 //        //-------------unicode traitement ------------
529 //        int c1, c2, c3, c4;
530 //        int unicodeSize = 6;
531 //        currentPosition++;
532 //        while (source[currentPosition] == 'u') {
533 //          currentPosition++;
534 //          unicodeSize++;
535 //        }
536 //
537 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
538 //          || c1 < 0)
539 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
540 //            || c2 < 0)
541 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
542 //            || c3 < 0)
543 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
544 //            || c4 < 0)) {
545 //          currentPosition = temp;
546 //          return false;
547 //        }
548 //
549 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
550 //        if (!Character.isDigit(currentCharacter)) {
551 //          currentPosition = temp;
552 //          return false;
553 //        }
554 //
555 //        //need the unicode buffer
556 //        if (withoutUnicodePtr == 0) {
557 //          //buffer all the entries that have been left aside....
558 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
559 //          System.arraycopy(
560 //            source,
561 //            startPosition,
562 //            withoutUnicodeBuffer,
563 //            1,
564 //            withoutUnicodePtr);
565 //        }
566 //        //fill the buffer with the char
567 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
568 //        return true;
569 //      } //-------------end unicode traitement--------------
570 //      else {
571         if (!Character.isDigit(currentCharacter)) {
572           currentPosition = temp;
573           return false;
574         }
575 //        if (withoutUnicodePtr != 0)
576 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
577         return true;
578 //      }
579     } catch (IndexOutOfBoundsException e) {
580       currentPosition = temp;
581       return false;
582     }
583   }
584   public final boolean getNextCharAsDigit(int radix) {
585     //BOOLEAN
586     //handle the case of unicode.
587     //when a unicode appears then we must use a buffer that holds char internal values
588     //At the end of this method currentCharacter holds the new visited char
589     //and currentPosition points right next after it
590     //Both previous lines are true if the currentCharacter is a digit base on radix
591     //On false, no side effect has occured.
592
593     //ALL getNextChar.... ARE OPTIMIZED COPIES 
594
595     int temp = currentPosition;
596     try {
597       currentCharacter = source[currentPosition++];
598 //      if (((currentCharacter = source[currentPosition++]) == '\\')
599 //        && (source[currentPosition] == 'u')) {
600 //        //-------------unicode traitement ------------
601 //        int c1, c2, c3, c4;
602 //        int unicodeSize = 6;
603 //        currentPosition++;
604 //        while (source[currentPosition] == 'u') {
605 //          currentPosition++;
606 //          unicodeSize++;
607 //        }
608 //
609 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
610 //          || c1 < 0)
611 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
612 //            || c2 < 0)
613 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
614 //            || c3 < 0)
615 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
616 //            || c4 < 0)) {
617 //          currentPosition = temp;
618 //          return false;
619 //        }
620 //
621 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
622 //        if (Character.digit(currentCharacter, radix) == -1) {
623 //          currentPosition = temp;
624 //          return false;
625 //        }
626 //
627 //        //need the unicode buffer
628 //        if (withoutUnicodePtr == 0) {
629 //          //buffer all the entries that have been left aside....
630 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
631 //          System.arraycopy(
632 //            source,
633 //            startPosition,
634 //            withoutUnicodeBuffer,
635 //            1,
636 //            withoutUnicodePtr);
637 //        }
638 //        //fill the buffer with the char
639 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
640 //        return true;
641 //      } //-------------end unicode traitement--------------
642 //      else {
643         if (Character.digit(currentCharacter, radix) == -1) {
644           currentPosition = temp;
645           return false;
646         }
647 //        if (withoutUnicodePtr != 0)
648 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
649         return true;
650 //      }
651     } catch (IndexOutOfBoundsException e) {
652       currentPosition = temp;
653       return false;
654     }
655   }
656   public boolean getNextCharAsJavaIdentifierPart() {
657     //BOOLEAN
658     //handle the case of unicode.
659     //when a unicode appears then we must use a buffer that holds char internal values
660     //At the end of this method currentCharacter holds the new visited char
661     //and currentPosition points right next after it
662     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
663     //On false, no side effect has occured.
664
665     //ALL getNextChar.... ARE OPTIMIZED COPIES 
666
667     int temp = currentPosition;
668     try {
669       currentCharacter = source[currentPosition++];
670 //      if (((currentCharacter = source[currentPosition++]) == '\\')
671 //        && (source[currentPosition] == 'u')) {
672 //        //-------------unicode traitement ------------
673 //        int c1, c2, c3, c4;
674 //        int unicodeSize = 6;
675 //        currentPosition++;
676 //        while (source[currentPosition] == 'u') {
677 //          currentPosition++;
678 //          unicodeSize++;
679 //        }
680 //
681 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
682 //          || c1 < 0)
683 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
684 //            || c2 < 0)
685 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
686 //            || c3 < 0)
687 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
688 //            || c4 < 0)) {
689 //          currentPosition = temp;
690 //          return false;
691 //        }
692 //
693 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
694 //        if (!isPHPIdentifierPart(currentCharacter)) {
695 //          currentPosition = temp;
696 //          return false;
697 //        }
698 //
699 //        //need the unicode buffer
700 //        if (withoutUnicodePtr == 0) {
701 //          //buffer all the entries that have been left aside....
702 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
703 //          System.arraycopy(
704 //            source,
705 //            startPosition,
706 //            withoutUnicodeBuffer,
707 //            1,
708 //            withoutUnicodePtr);
709 //        }
710 //        //fill the buffer with the char
711 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
712 //        return true;
713 //      } //-------------end unicode traitement--------------
714 //      else {
715         if (!isPHPIdentifierPart(currentCharacter)) {
716           currentPosition = temp;
717           return false;
718         }
719
720 //        if (withoutUnicodePtr != 0)
721 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
722         return true;
723 //      }
724     } catch (IndexOutOfBoundsException e) {
725       currentPosition = temp;
726       return false;
727     }
728   }
729
730   public int getNextToken() throws InvalidInputException {
731     int htmlPosition = currentPosition;
732     try {
733       while (!phpMode) {
734         currentCharacter = source[currentPosition++];
735         if (currentCharacter == '<') {
736           if (getNextChar('?')) {
737             currentCharacter = source[currentPosition++];
738             if ((currentCharacter == ' ')
739               || Character.isWhitespace(currentCharacter)) {
740               // <?
741               startPosition = currentPosition;
742               phpMode = true;
743               if (tokenizeWhiteSpace) {
744                 // && (whiteStart != currentPosition - 1)) {
745                 // reposition scanner in case we are interested by spaces as tokens
746                 startPosition = htmlPosition;
747                 return TokenNameHTML;
748               }
749             } else {
750               boolean phpStart =
751                 (currentCharacter == 'P') || (currentCharacter == 'p');
752               if (phpStart) {
753                 int test = getNextChar('H', 'h');
754                 if (test >= 0) {
755                   test = getNextChar('P', 'p');
756                   if (test >= 0) {
757                     // <?PHP  <?php
758                     startPosition = currentPosition;
759                     phpMode = true;
760
761                     if (tokenizeWhiteSpace) {
762                       // && (whiteStart != currentPosition - 1)) {
763                       // reposition scanner in case we are interested by spaces as tokens
764                       startPosition = htmlPosition;
765                       return TokenNameHTML;
766                     }
767                   }
768                 }
769               }
770             }
771           }
772         }
773
774         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
775           if (recordLineSeparator) {
776             pushLineSeparator();
777           } else {
778             currentLine = null;
779           }
780         }
781       }
782     } //-----------------end switch while try--------------------
783     catch (IndexOutOfBoundsException e) {
784       if (tokenizeWhiteSpace) {
785         // && (whiteStart != currentPosition - 1)) {
786         // reposition scanner in case we are interested by spaces as tokens
787         startPosition = htmlPosition;
788       }
789       return TokenNameEOF;
790     }
791
792     if (phpMode) {
793       this.wasAcr = false;
794       if (diet) {
795         jumpOverMethodBody();
796         diet = false;
797         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
798       }
799       try {
800         while (true) { //loop for jumping over comments
801           withoutUnicodePtr = 0;
802           //start with a new token (even comment written with unicode )
803
804           // ---------Consume white space and handles startPosition---------
805           int whiteStart = currentPosition;
806           boolean isWhiteSpace;
807           do {
808             startPosition = currentPosition;
809             currentCharacter = source[currentPosition++];
810 //            if (((currentCharacter = source[currentPosition++]) == '\\')
811 //              && (source[currentPosition] == 'u')) {
812 //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
813 //            } else {
814               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
815                 checkNonExternalizeString();
816                 if (recordLineSeparator) {
817                   pushLineSeparator();
818                 } else {
819                   currentLine = null;
820                 }
821               }
822               isWhiteSpace =
823                 (currentCharacter == ' ')
824                   || Character.isWhitespace(currentCharacter);
825 //            }
826           } while (isWhiteSpace);
827           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
828             // reposition scanner in case we are interested by spaces as tokens
829             currentPosition--;
830             startPosition = whiteStart;
831             return TokenNameWHITESPACE;
832           }
833           //little trick to get out in the middle of a source compuation
834           if (currentPosition > eofPosition)
835             return TokenNameEOF;
836
837           // ---------Identify the next token-------------
838
839           switch (currentCharacter) {
840             case '(' :
841               return TokenNameLPAREN;
842             case ')' :
843               return TokenNameRPAREN;
844             case '{' :
845               return TokenNameLBRACE;
846             case '}' :
847               return TokenNameRBRACE;
848             case '[' :
849               return TokenNameLBRACKET;
850             case ']' :
851               return TokenNameRBRACKET;
852             case ';' :
853               return TokenNameSEMICOLON;
854             case ',' :
855               return TokenNameCOMMA;
856
857             case '.' :
858               if (getNextCharAsDigit())
859                 return scanNumber(true);
860               return TokenNameDOT;
861             case '+' :
862               {
863                 int test;
864                 if ((test = getNextChar('+', '=')) == 0)
865                   return TokenNamePLUS_PLUS;
866                 if (test > 0)
867                   return TokenNamePLUS_EQUAL;
868                 return TokenNamePLUS;
869               }
870             case '-' :
871               {
872                 int test;
873                 if ((test = getNextChar('-', '=')) == 0)
874                   return TokenNameMINUS_MINUS;
875                 if (test > 0)
876                   return TokenNameMINUS_EQUAL;
877                 if (getNextChar('>'))
878                   return TokenNameMINUS_GREATER;
879
880                 return TokenNameMINUS;
881               }
882             case '~' :
883               if (getNextChar('='))
884                 return TokenNameTWIDDLE_EQUAL;
885               return TokenNameTWIDDLE;
886             case '!' :
887               if (getNextChar('='))
888                 return TokenNameNOT_EQUAL;
889               return TokenNameNOT;
890             case '*' :
891               if (getNextChar('='))
892                 return TokenNameMULTIPLY_EQUAL;
893               return TokenNameMULTIPLY;
894             case '%' :
895               if (getNextChar('='))
896                 return TokenNameREMAINDER_EQUAL;
897               return TokenNameREMAINDER;
898             case '<' :
899               {
900                 int test;
901                 if ((test = getNextChar('=', '<')) == 0)
902                   return TokenNameLESS_EQUAL;
903                 if (test > 0) {
904                   if (getNextChar('='))
905                     return TokenNameLEFT_SHIFT_EQUAL;
906                   if (getNextChar('<')) {
907                     int heredocStart = currentPosition;
908                     int heredocLength = 0;
909                     currentCharacter = source[currentPosition++];
910                     if (isPHPIdentifierStart(currentCharacter)) {
911                       currentCharacter = source[currentPosition++];
912                     } else {
913                       return TokenNameERROR;
914                     }
915                     while (isPHPIdentifierPart(currentCharacter)) {
916                       currentCharacter = source[currentPosition++];
917                     }
918
919                     heredocLength = currentPosition - heredocStart - 1;
920
921                     // heredoc end-tag determination
922                     boolean endTag = true;
923                     char ch;
924                     do {
925                       ch = source[currentPosition++];
926                       if (ch == '\r' || ch == '\n') {
927                         if (recordLineSeparator) {
928                           pushLineSeparator();
929                         } else {
930                           currentLine = null;
931                         }
932                         for (int i = 0; i < heredocLength; i++) {
933                           if (source[currentPosition + i]
934                             != source[heredocStart + i]) {
935                             endTag = false;
936                             break;
937                           }
938                         }
939                         if (endTag) {
940                           currentPosition += heredocLength - 1;
941                           currentCharacter = source[currentPosition++];
942                           break; // do...while loop
943                         } else {
944                           endTag = true;
945                         }
946                       }
947
948                     } while (true);
949
950                     return TokenNameHEREDOC;
951                   }
952                   return TokenNameLEFT_SHIFT;
953                 }
954                 return TokenNameLESS;
955               }
956             case '>' :
957               {
958                 int test;
959                 if ((test = getNextChar('=', '>')) == 0)
960                   return TokenNameGREATER_EQUAL;
961                 if (test > 0) {
962                   if ((test = getNextChar('=', '>')) == 0)
963                     return TokenNameRIGHT_SHIFT_EQUAL;
964                   return TokenNameRIGHT_SHIFT;
965                 }
966                 return TokenNameGREATER;
967               }
968             case '=' :
969               if (getNextChar('='))
970                 return TokenNameEQUAL_EQUAL;
971               if (getNextChar('>'))
972                 return TokenNameEQUAL_GREATER;
973               return TokenNameEQUAL;
974             case '&' :
975               {
976                 int test;
977                 if ((test = getNextChar('&', '=')) == 0)
978                   return TokenNameAND_AND;
979                 if (test > 0)
980                   return TokenNameAND_EQUAL;
981                 return TokenNameAND;
982               }
983             case '|' :
984               {
985                 int test;
986                 if ((test = getNextChar('|', '=')) == 0)
987                   return TokenNameOR_OR;
988                 if (test > 0)
989                   return TokenNameOR_EQUAL;
990                 return TokenNameOR;
991               }
992             case '^' :
993               if (getNextChar('='))
994                 return TokenNameXOR_EQUAL;
995               return TokenNameXOR;
996             case '?' :
997               if (getNextChar('>')) {
998                 phpMode = false;
999                 return TokenNameStopPHP;
1000               }
1001               return TokenNameQUESTION;
1002             case ':' :
1003               if (getNextChar(':'))
1004                 return TokenNameCOLON_COLON;
1005               return TokenNameCOLON;
1006             case '@' :
1007               return TokenNameAT;
1008               //                                        case '\'' :
1009               //                                                {
1010               //                                                        int test;
1011               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
1012               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1013               //                                                        }
1014               //                                                        if (test > 0) {
1015               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1016               //                                                                for (int lookAhead = 0;
1017               //                                                                        lookAhead < 3;
1018               //                                                                        lookAhead++) {
1019               //                                                                        if (currentPosition + lookAhead
1020               //                                                                                == source.length)
1021               //                                                                                break;
1022               //                                                                        if (source[currentPosition + lookAhead]
1023               //                                                                                == '\n')
1024               //                                                                                break;
1025               //                                                                        if (source[currentPosition + lookAhead]
1026               //                                                                                == '\'') {
1027               //                                                                                currentPosition += lookAhead + 1;
1028               //                                                                                break;
1029               //                                                                        }
1030               //                                                                }
1031               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1032               //                                                        }
1033               //                                                }
1034               //                                                if (getNextChar('\'')) {
1035               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1036               //                                                        for (int lookAhead = 0;
1037               //                                                                lookAhead < 3;
1038               //                                                                lookAhead++) {
1039               //                                                                if (currentPosition + lookAhead
1040               //                                                                        == source.length)
1041               //                                                                        break;
1042               //                                                                if (source[currentPosition + lookAhead]
1043               //                                                                        == '\n')
1044               //                                                                        break;
1045               //                                                                if (source[currentPosition + lookAhead]
1046               //                                                                        == '\'') {
1047               //                                                                        currentPosition += lookAhead + 1;
1048               //                                                                        break;
1049               //                                                                }
1050               //                                                        }
1051               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1052               //                                                }
1053               //                                                if (getNextChar('\\'))
1054               //                                                        scanEscapeCharacter();
1055               //                                                else { // consume next character
1056               //                                                        unicodeAsBackSlash = false;
1057               //                                                        if (((currentCharacter = source[currentPosition++])
1058               //                                                                == '\\')
1059               //                                                                && (source[currentPosition] == 'u')) {
1060               //                                                                getNextUnicodeChar();
1061               //                                                        } else {
1062               //                                                                if (withoutUnicodePtr != 0) {
1063               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
1064               //                                                                                currentCharacter;
1065               //                                                                }
1066               //                                                        }
1067               //                                                }
1068               //                                                //            if (getNextChar('\''))
1069               //                                                //              return TokenNameCharacterLiteral;
1070               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1071               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
1072               //                                                        if (currentPosition + lookAhead == source.length)
1073               //                                                                break;
1074               //                                                        if (source[currentPosition + lookAhead] == '\n')
1075               //                                                                break;
1076               //                                                        if (source[currentPosition + lookAhead] == '\'') {
1077               //                                                                currentPosition += lookAhead + 1;
1078               //                                                                break;
1079               //                                                        }
1080               //                                                }
1081               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
1082             case '\'' :
1083               try {
1084                 // consume next character
1085                 unicodeAsBackSlash = false;
1086                 currentCharacter = source[currentPosition++];
1087 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1088 //                  && (source[currentPosition] == 'u')) {
1089 //                  getNextUnicodeChar();
1090 //                } else {
1091 //                  if (withoutUnicodePtr != 0) {
1092 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1093 //                      currentCharacter;
1094 //                  }
1095 //                }
1096
1097                 while (currentCharacter != '\'') {
1098
1099                   /**** in PHP \r and \n are valid in string literals ****/
1100                   //                  if ((currentCharacter == '\n')
1101                   //                    || (currentCharacter == '\r')) {
1102                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1103                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1104                   //                      if (currentPosition + lookAhead == source.length)
1105                   //                        break;
1106                   //                      if (source[currentPosition + lookAhead] == '\n')
1107                   //                        break;
1108                   //                      if (source[currentPosition + lookAhead] == '\"') {
1109                   //                        currentPosition += lookAhead + 1;
1110                   //                        break;
1111                   //                      }
1112                   //                    }
1113                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1114                   //                  }
1115                   if (currentCharacter == '\\') {
1116                     int escapeSize = currentPosition;
1117                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1118                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1119                     scanSingleQuotedEscapeCharacter();
1120                     escapeSize = currentPosition - escapeSize;
1121                     if (withoutUnicodePtr == 0) {
1122                       //buffer all the entries that have been left aside....
1123                       withoutUnicodePtr =
1124                         currentPosition - escapeSize - 1 - startPosition;
1125                       System.arraycopy(
1126                         source,
1127                         startPosition,
1128                         withoutUnicodeBuffer,
1129                         1,
1130                         withoutUnicodePtr);
1131                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1132                         currentCharacter;
1133                     } else { //overwrite the / in the buffer
1134                       withoutUnicodeBuffer[withoutUnicodePtr] =
1135                         currentCharacter;
1136                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1137                         withoutUnicodePtr--;
1138                       }
1139                     }
1140                   }
1141                   // consume next character
1142                   unicodeAsBackSlash = false;
1143                   currentCharacter = source[currentPosition++];
1144 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1145 //                    && (source[currentPosition] == 'u')) {
1146 //                    getNextUnicodeChar();
1147 //                  } else {
1148                     if (withoutUnicodePtr != 0) {
1149                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1150                         currentCharacter;
1151                     }
1152 //                  }
1153
1154                 }
1155               } catch (IndexOutOfBoundsException e) {
1156                 throw new InvalidInputException(UNTERMINATED_STRING);
1157               } catch (InvalidInputException e) {
1158                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1159                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1160                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1161                     if (currentPosition + lookAhead == source.length)
1162                       break;
1163                     if (source[currentPosition + lookAhead] == '\n')
1164                       break;
1165                     if (source[currentPosition + lookAhead] == '\'') {
1166                       currentPosition += lookAhead + 1;
1167                       break;
1168                     }
1169                   }
1170
1171                 }
1172                 throw e; // rethrow
1173               }
1174               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1175                 if (currentLine == null) {
1176                   currentLine = new NLSLine();
1177                   lines.add(currentLine);
1178                 }
1179                 currentLine.add(
1180                   new StringLiteral(
1181                     getCurrentTokenSourceString(),
1182                     startPosition,
1183                     currentPosition - 1));
1184               }
1185               return TokenNameStringConstant;
1186             case '"' :
1187               try {
1188                 // consume next character
1189                 unicodeAsBackSlash = false;
1190                 currentCharacter = source[currentPosition++];
1191 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1192 //                  && (source[currentPosition] == 'u')) {
1193 //                  getNextUnicodeChar();
1194 //                } else {
1195 //                  if (withoutUnicodePtr != 0) {
1196 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1197 //                      currentCharacter;
1198 //                  }
1199 //                }
1200
1201                 while (currentCharacter != '"') {
1202
1203                   /**** in PHP \r and \n are valid in string literals ****/
1204                   //                  if ((currentCharacter == '\n')
1205                   //                    || (currentCharacter == '\r')) {
1206                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1207                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1208                   //                      if (currentPosition + lookAhead == source.length)
1209                   //                        break;
1210                   //                      if (source[currentPosition + lookAhead] == '\n')
1211                   //                        break;
1212                   //                      if (source[currentPosition + lookAhead] == '\"') {
1213                   //                        currentPosition += lookAhead + 1;
1214                   //                        break;
1215                   //                      }
1216                   //                    }
1217                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1218                   //                  }
1219                   if (currentCharacter == '\\') {
1220                     int escapeSize = currentPosition;
1221                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1222                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1223                     scanDoubleQuotedEscapeCharacter();
1224                     escapeSize = currentPosition - escapeSize;
1225                     if (withoutUnicodePtr == 0) {
1226                       //buffer all the entries that have been left aside....
1227                       withoutUnicodePtr =
1228                         currentPosition - escapeSize - 1 - startPosition;
1229                       System.arraycopy(
1230                         source,
1231                         startPosition,
1232                         withoutUnicodeBuffer,
1233                         1,
1234                         withoutUnicodePtr);
1235                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1236                         currentCharacter;
1237                     } else { //overwrite the / in the buffer
1238                       withoutUnicodeBuffer[withoutUnicodePtr] =
1239                         currentCharacter;
1240                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1241                         withoutUnicodePtr--;
1242                       }
1243                     }
1244                   }
1245                   // consume next character
1246                   unicodeAsBackSlash = false;
1247                   currentCharacter = source[currentPosition++];
1248 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1249 //                    && (source[currentPosition] == 'u')) {
1250 //                    getNextUnicodeChar();
1251 //                  } else {
1252                     if (withoutUnicodePtr != 0) {
1253                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1254                         currentCharacter;
1255                     }
1256 //                  }
1257
1258                 }
1259               } catch (IndexOutOfBoundsException e) {
1260                 throw new InvalidInputException(UNTERMINATED_STRING);
1261               } catch (InvalidInputException e) {
1262                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1263                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1264                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1265                     if (currentPosition + lookAhead == source.length)
1266                       break;
1267                     if (source[currentPosition + lookAhead] == '\n')
1268                       break;
1269                     if (source[currentPosition + lookAhead] == '\"') {
1270                       currentPosition += lookAhead + 1;
1271                       break;
1272                     }
1273                   }
1274
1275                 }
1276                 throw e; // rethrow
1277               }
1278               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1279                 if (currentLine == null) {
1280                   currentLine = new NLSLine();
1281                   lines.add(currentLine);
1282                 }
1283                 currentLine.add(
1284                   new StringLiteral(
1285                     getCurrentTokenSourceString(),
1286                     startPosition,
1287                     currentPosition - 1));
1288               }
1289               return TokenNameStringLiteral;
1290             case '`' :
1291               try {
1292                 // consume next character
1293                 unicodeAsBackSlash = false;
1294                 currentCharacter = source[currentPosition++];
1295 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1296 //                  && (source[currentPosition] == 'u')) {
1297 //                  getNextUnicodeChar();
1298 //                } else {
1299 //                  if (withoutUnicodePtr != 0) {
1300 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
1301 //                      currentCharacter;
1302 //                  }
1303 //                }
1304
1305                 while (currentCharacter != '`') {
1306
1307                   /**** in PHP \r and \n are valid in string literals ****/
1308                   //                if ((currentCharacter == '\n')
1309                   //                  || (currentCharacter == '\r')) {
1310                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1311                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1312                   //                    if (currentPosition + lookAhead == source.length)
1313                   //                      break;
1314                   //                    if (source[currentPosition + lookAhead] == '\n')
1315                   //                      break;
1316                   //                    if (source[currentPosition + lookAhead] == '\"') {
1317                   //                      currentPosition += lookAhead + 1;
1318                   //                      break;
1319                   //                    }
1320                   //                  }
1321                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
1322                   //                }
1323                   if (currentCharacter == '\\') {
1324                     int escapeSize = currentPosition;
1325                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
1326                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
1327                     scanDoubleQuotedEscapeCharacter();
1328                     escapeSize = currentPosition - escapeSize;
1329                     if (withoutUnicodePtr == 0) {
1330                       //buffer all the entries that have been left aside....
1331                       withoutUnicodePtr =
1332                         currentPosition - escapeSize - 1 - startPosition;
1333                       System.arraycopy(
1334                         source,
1335                         startPosition,
1336                         withoutUnicodeBuffer,
1337                         1,
1338                         withoutUnicodePtr);
1339                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1340                         currentCharacter;
1341                     } else { //overwrite the / in the buffer
1342                       withoutUnicodeBuffer[withoutUnicodePtr] =
1343                         currentCharacter;
1344                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
1345                         withoutUnicodePtr--;
1346                       }
1347                     }
1348                   }
1349                   // consume next character
1350                   unicodeAsBackSlash = false;
1351                   currentCharacter = source[currentPosition++];
1352 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1353 //                    && (source[currentPosition] == 'u')) {
1354 //                    getNextUnicodeChar();
1355 //                  } else {
1356                     if (withoutUnicodePtr != 0) {
1357                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1358                         currentCharacter;
1359                     }
1360 //                  }
1361
1362                 }
1363               } catch (IndexOutOfBoundsException e) {
1364                 throw new InvalidInputException(UNTERMINATED_STRING);
1365               } catch (InvalidInputException e) {
1366                 if (e.getMessage().equals(INVALID_ESCAPE)) {
1367                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
1368                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
1369                     if (currentPosition + lookAhead == source.length)
1370                       break;
1371                     if (source[currentPosition + lookAhead] == '\n')
1372                       break;
1373                     if (source[currentPosition + lookAhead] == '`') {
1374                       currentPosition += lookAhead + 1;
1375                       break;
1376                     }
1377                   }
1378
1379                 }
1380                 throw e; // rethrow
1381               }
1382               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
1383                 if (currentLine == null) {
1384                   currentLine = new NLSLine();
1385                   lines.add(currentLine);
1386                 }
1387                 currentLine.add(
1388                   new StringLiteral(
1389                     getCurrentTokenSourceString(),
1390                     startPosition,
1391                     currentPosition - 1));
1392               }
1393               return TokenNameStringInterpolated;
1394             case '#' :
1395             case '/' :
1396               {
1397                 int test;
1398                 if ((currentCharacter == '#')
1399                   || (test = getNextChar('/', '*')) == 0) {
1400                   //line comment 
1401                   int endPositionForLineComment = 0;
1402                   try { //get the next char 
1403                     currentCharacter = source[currentPosition++];
1404 //                    if (((currentCharacter = source[currentPosition++])
1405 //                      == '\\')
1406 //                      && (source[currentPosition] == 'u')) {
1407 //                      //-------------unicode traitement ------------
1408 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1409 //                      currentPosition++;
1410 //                      while (source[currentPosition] == 'u') {
1411 //                        currentPosition++;
1412 //                      }
1413 //                      if ((c1 =
1414 //                        Character.getNumericValue(source[currentPosition++]))
1415 //                        > 15
1416 //                        || c1 < 0
1417 //                        || (c2 =
1418 //                          Character.getNumericValue(source[currentPosition++]))
1419 //                          > 15
1420 //                        || c2 < 0
1421 //                        || (c3 =
1422 //                          Character.getNumericValue(source[currentPosition++]))
1423 //                          > 15
1424 //                        || c3 < 0
1425 //                        || (c4 =
1426 //                          Character.getNumericValue(source[currentPosition++]))
1427 //                          > 15
1428 //                        || c4 < 0) {
1429 //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1430 //                      } else {
1431 //                        currentCharacter =
1432 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1433 //                      }
1434 //                    }
1435
1436                     //handle the \\u case manually into comment
1437 //                    if (currentCharacter == '\\') {
1438 //                      if (source[currentPosition] == '\\')
1439 //                        currentPosition++;
1440 //                    } //jump over the \\
1441                     boolean isUnicode = false;
1442                     while (currentCharacter != '\r'
1443                       && currentCharacter != '\n') {
1444                       if (currentCharacter == '?') {
1445                         if (getNextChar('>')) {
1446                           startPosition = currentPosition - 2;
1447                           phpMode = false;
1448                           return TokenNameStopPHP;
1449                         }
1450                       }
1451
1452                       //get the next char
1453                       isUnicode = false;
1454                       currentCharacter = source[currentPosition++];
1455 //                      if (((currentCharacter = source[currentPosition++])
1456 //                        == '\\')
1457 //                        && (source[currentPosition] == 'u')) {
1458 //                        isUnicode = true;
1459 //                        //-------------unicode traitement ------------
1460 //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1461 //                        currentPosition++;
1462 //                        while (source[currentPosition] == 'u') {
1463 //                          currentPosition++;
1464 //                        }
1465 //                        if ((c1 =
1466 //                          Character.getNumericValue(source[currentPosition++]))
1467 //                          > 15
1468 //                          || c1 < 0
1469 //                          || (c2 =
1470 //                            Character.getNumericValue(
1471 //                              source[currentPosition++]))
1472 //                            > 15
1473 //                          || c2 < 0
1474 //                          || (c3 =
1475 //                            Character.getNumericValue(
1476 //                              source[currentPosition++]))
1477 //                            > 15
1478 //                          || c3 < 0
1479 //                          || (c4 =
1480 //                            Character.getNumericValue(
1481 //                              source[currentPosition++]))
1482 //                            > 15
1483 //                          || c4 < 0) {
1484 //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1485 //                        } else {
1486 //                          currentCharacter =
1487 //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1488 //                        }
1489 //                      }
1490                       //handle the \\u case manually into comment
1491 //                      if (currentCharacter == '\\') {
1492 //                        if (source[currentPosition] == '\\')
1493 //                          currentPosition++;
1494 //                      } //jump over the \\
1495                     }
1496                     if (isUnicode) {
1497                       endPositionForLineComment = currentPosition - 6;
1498                     } else {
1499                       endPositionForLineComment = currentPosition - 1;
1500                     }
1501                     recordComment(false);
1502                     if ((currentCharacter == '\r')
1503                       || (currentCharacter == '\n')) {
1504                       checkNonExternalizeString();
1505                       if (recordLineSeparator) {
1506                         if (isUnicode) {
1507                           pushUnicodeLineSeparator();
1508                         } else {
1509                           pushLineSeparator();
1510                         }
1511                       } else {
1512                         currentLine = null;
1513                       }
1514                     }
1515                     if (tokenizeComments) {
1516                       if (!isUnicode) {
1517                         currentPosition = endPositionForLineComment;
1518                         // reset one character behind
1519                       }
1520                       return TokenNameCOMMENT_LINE;
1521                     }
1522                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
1523                     if (tokenizeComments) {
1524                       currentPosition--;
1525                       // reset one character behind
1526                       return TokenNameCOMMENT_LINE;
1527                     }
1528                   }
1529                   break;
1530                 }
1531                 if (test > 0) {
1532                   //traditional and annotation comment
1533                   boolean isJavadoc = false, star = false;
1534                   // consume next character
1535                   unicodeAsBackSlash = false;
1536                   currentCharacter = source[currentPosition++];
1537 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1538 //                    && (source[currentPosition] == 'u')) {
1539 //                    getNextUnicodeChar();
1540 //                  } else {
1541 //                    if (withoutUnicodePtr != 0) {
1542 //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
1543 //                        currentCharacter;
1544 //                    }
1545 //                  }
1546
1547                   if (currentCharacter == '*') {
1548                     isJavadoc = true;
1549                     star = true;
1550                   }
1551                   if ((currentCharacter == '\r')
1552                     || (currentCharacter == '\n')) {
1553                     checkNonExternalizeString();
1554                     if (recordLineSeparator) {
1555                       pushLineSeparator();
1556                     } else {
1557                       currentLine = null;
1558                     }
1559                   }
1560                   try { //get the next char 
1561                     currentCharacter = source[currentPosition++];
1562 //                    if (((currentCharacter = source[currentPosition++])
1563 //                      == '\\')
1564 //                      && (source[currentPosition] == 'u')) {
1565 //                      //-------------unicode traitement ------------
1566 //                      getNextUnicodeChar();
1567 //                    }
1568                     //handle the \\u case manually into comment
1569 //                    if (currentCharacter == '\\') {
1570 //                      if (source[currentPosition] == '\\')
1571 //                        currentPosition++;
1572 //                      //jump over the \\
1573 //                    }
1574                     // empty comment is not a javadoc /**/
1575                     if (currentCharacter == '/') {
1576                       isJavadoc = false;
1577                     }
1578                     //loop until end of comment */
1579                     while ((currentCharacter != '/') || (!star)) {
1580                       if ((currentCharacter == '\r')
1581                         || (currentCharacter == '\n')) {
1582                         checkNonExternalizeString();
1583                         if (recordLineSeparator) {
1584                           pushLineSeparator();
1585                         } else {
1586                           currentLine = null;
1587                         }
1588                       }
1589                       star = currentCharacter == '*';
1590                       //get next char
1591                       currentCharacter = source[currentPosition++];
1592 //                      if (((currentCharacter = source[currentPosition++])
1593 //                        == '\\')
1594 //                        && (source[currentPosition] == 'u')) {
1595 //                        //-------------unicode traitement ------------
1596 //                        getNextUnicodeChar();
1597 //                      }
1598                       //handle the \\u case manually into comment
1599 //                      if (currentCharacter == '\\') {
1600 //                        if (source[currentPosition] == '\\')
1601 //                          currentPosition++;
1602 //                      } //jump over the \\
1603                     }
1604                     recordComment(isJavadoc);
1605                     if (tokenizeComments) {
1606                       if (isJavadoc)
1607                         return TokenNameCOMMENT_PHPDOC;
1608                       return TokenNameCOMMENT_BLOCK;
1609                     }
1610                   } catch (IndexOutOfBoundsException e) {
1611                     throw new InvalidInputException(UNTERMINATED_COMMENT);
1612                   }
1613                   break;
1614                 }
1615                 if (getNextChar('='))
1616                   return TokenNameDIVIDE_EQUAL;
1617                 return TokenNameDIVIDE;
1618               }
1619             case '\u001a' :
1620               if (atEnd())
1621                 return TokenNameEOF;
1622               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
1623               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
1624
1625             default :
1626               if (currentCharacter == '$') {
1627                 while ((currentCharacter = source[currentPosition++]) == '$') {
1628                 }
1629                 if (currentCharacter == '{')
1630                   return TokenNameDOLLAR_LBRACE;
1631                 if (isPHPIdentifierStart(currentCharacter))
1632                   return scanIdentifierOrKeyword(true);
1633                 return TokenNameERROR;
1634               }
1635               if (isPHPIdentifierStart(currentCharacter))
1636                 return scanIdentifierOrKeyword(false);
1637               if (Character.isDigit(currentCharacter))
1638                 return scanNumber(false);
1639               return TokenNameERROR;
1640           }
1641         }
1642       } //-----------------end switch while try--------------------
1643       catch (IndexOutOfBoundsException e) {
1644       }
1645     }
1646     return TokenNameEOF;
1647   }
1648
1649 //  public final void getNextUnicodeChar()
1650 //    throws IndexOutOfBoundsException, InvalidInputException {
1651 //    //VOID
1652 //    //handle the case of unicode.
1653 //    //when a unicode appears then we must use a buffer that holds char internal values
1654 //    //At the end of this method currentCharacter holds the new visited char
1655 //    //and currentPosition points right next after it
1656 //
1657 //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
1658 //
1659 //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
1660 //    currentPosition++;
1661 //    while (source[currentPosition] == 'u') {
1662 //      currentPosition++;
1663 //      unicodeSize++;
1664 //    }
1665 //
1666 //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
1667 //      || c1 < 0
1668 //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
1669 //      || c2 < 0
1670 //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
1671 //      || c3 < 0
1672 //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
1673 //      || c4 < 0) {
1674 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
1675 //    } else {
1676 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1677 //      //need the unicode buffer
1678 //      if (withoutUnicodePtr == 0) {
1679 //        //buffer all the entries that have been left aside....
1680 //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
1681 //        System.arraycopy(
1682 //          source,
1683 //          startPosition,
1684 //          withoutUnicodeBuffer,
1685 //          1,
1686 //          withoutUnicodePtr);
1687 //      }
1688 //      //fill the buffer with the char
1689 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
1690 //    }
1691 //    unicodeAsBackSlash = currentCharacter == '\\';
1692 //  }
1693   /* Tokenize a method body, assuming that curly brackets are properly balanced.
1694    */
1695   public final void jumpOverMethodBody() {
1696
1697     this.wasAcr = false;
1698     int found = 1;
1699     try {
1700       while (true) { //loop for jumping over comments
1701         // ---------Consume white space and handles startPosition---------
1702         boolean isWhiteSpace;
1703         do {
1704           startPosition = currentPosition;
1705           currentCharacter = source[currentPosition++];
1706 //          if (((currentCharacter = source[currentPosition++]) == '\\')
1707 //            && (source[currentPosition] == 'u')) {
1708 //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
1709 //          } else {
1710             if (recordLineSeparator
1711               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1712               pushLineSeparator();
1713             isWhiteSpace = Character.isWhitespace(currentCharacter);
1714 //          }
1715         } while (isWhiteSpace);
1716
1717         // -------consume token until } is found---------
1718         switch (currentCharacter) {
1719           case '{' :
1720             found++;
1721             break;
1722           case '}' :
1723             found--;
1724             if (found == 0)
1725               return;
1726             break;
1727           case '\'' :
1728             {
1729               boolean test;
1730               test = getNextChar('\\');
1731               if (test) {
1732                 try {
1733                   scanDoubleQuotedEscapeCharacter();
1734                 } catch (InvalidInputException ex) {
1735                 };
1736               } else {
1737 //                try { // consume next character
1738                   unicodeAsBackSlash = false;
1739                   currentCharacter = source[currentPosition++];
1740 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1741 //                    && (source[currentPosition] == 'u')) {
1742 //                    getNextUnicodeChar();
1743 //                  } else {
1744                     if (withoutUnicodePtr != 0) {
1745                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1746                         currentCharacter;
1747                     }
1748 //                  }
1749 //                } catch (InvalidInputException ex) {
1750 //                };
1751               }
1752               getNextChar('\'');
1753               break;
1754             }
1755           case '"' :
1756             try {
1757 //              try { // consume next character
1758                 unicodeAsBackSlash = false;
1759                 currentCharacter = source[currentPosition++];
1760 //                if (((currentCharacter = source[currentPosition++]) == '\\')
1761 //                  && (source[currentPosition] == 'u')) {
1762 //                  getNextUnicodeChar();
1763 //                } else {
1764                   if (withoutUnicodePtr != 0) {
1765                     withoutUnicodeBuffer[++withoutUnicodePtr] =
1766                       currentCharacter;
1767                   }
1768 //                }
1769 //              } catch (InvalidInputException ex) {
1770 //              };
1771               while (currentCharacter != '"') {
1772                 if (currentCharacter == '\r') {
1773                   if (source[currentPosition] == '\n')
1774                     currentPosition++;
1775                   break;
1776                   // the string cannot go further that the line
1777                 }
1778                 if (currentCharacter == '\n') {
1779                   break;
1780                   // the string cannot go further that the line
1781                 }
1782                 if (currentCharacter == '\\') {
1783                   try {
1784                     scanDoubleQuotedEscapeCharacter();
1785                   } catch (InvalidInputException ex) {
1786                   };
1787                 }
1788 //                try { // consume next character
1789                   unicodeAsBackSlash = false;
1790                   currentCharacter = source[currentPosition++];
1791 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1792 //                    && (source[currentPosition] == 'u')) {
1793 //                    getNextUnicodeChar();
1794 //                  } else {
1795                     if (withoutUnicodePtr != 0) {
1796                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1797                         currentCharacter;
1798                     }
1799 //                  }
1800 //                } catch (InvalidInputException ex) {
1801 //                };
1802               }
1803             } catch (IndexOutOfBoundsException e) {
1804               return;
1805             }
1806             break;
1807           case '/' :
1808             {
1809               int test;
1810               if ((test = getNextChar('/', '*')) == 0) {
1811                 //line comment 
1812                 try {
1813                   //get the next char 
1814                   currentCharacter = source[currentPosition++];
1815 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1816 //                    && (source[currentPosition] == 'u')) {
1817 //                    //-------------unicode traitement ------------
1818 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1819 //                    currentPosition++;
1820 //                    while (source[currentPosition] == 'u') {
1821 //                      currentPosition++;
1822 //                    }
1823 //                    if ((c1 =
1824 //                      Character.getNumericValue(source[currentPosition++]))
1825 //                      > 15
1826 //                      || c1 < 0
1827 //                      || (c2 =
1828 //                        Character.getNumericValue(source[currentPosition++]))
1829 //                        > 15
1830 //                      || c2 < 0
1831 //                      || (c3 =
1832 //                        Character.getNumericValue(source[currentPosition++]))
1833 //                        > 15
1834 //                      || c3 < 0
1835 //                      || (c4 =
1836 //                        Character.getNumericValue(source[currentPosition++]))
1837 //                        > 15
1838 //                      || c4 < 0) {
1839 //                      //error don't care of the value
1840 //                      currentCharacter = 'A';
1841 //                    } //something different from \n and \r
1842 //                    else {
1843 //                      currentCharacter =
1844 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1845 //                    }
1846 //                  }
1847
1848                   while (currentCharacter != '\r'
1849                     && currentCharacter != '\n') {
1850                     //get the next char 
1851                     currentCharacter = source[currentPosition++];
1852 //                    if (((currentCharacter = source[currentPosition++])
1853 //                      == '\\')
1854 //                      && (source[currentPosition] == 'u')) {
1855 //                      //-------------unicode traitement ------------
1856 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1857 //                      currentPosition++;
1858 //                      while (source[currentPosition] == 'u') {
1859 //                        currentPosition++;
1860 //                      }
1861 //                      if ((c1 =
1862 //                        Character.getNumericValue(source[currentPosition++]))
1863 //                        > 15
1864 //                        || c1 < 0
1865 //                        || (c2 =
1866 //                          Character.getNumericValue(source[currentPosition++]))
1867 //                          > 15
1868 //                        || c2 < 0
1869 //                        || (c3 =
1870 //                          Character.getNumericValue(source[currentPosition++]))
1871 //                          > 15
1872 //                        || c3 < 0
1873 //                        || (c4 =
1874 //                          Character.getNumericValue(source[currentPosition++]))
1875 //                          > 15
1876 //                        || c4 < 0) {
1877 //                        //error don't care of the value
1878 //                        currentCharacter = 'A';
1879 //                      } //something different from \n and \r
1880 //                      else {
1881 //                        currentCharacter =
1882 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1883 //                      }
1884 //                    }
1885                   }
1886                   if (recordLineSeparator
1887                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1888                     pushLineSeparator();
1889                 } catch (IndexOutOfBoundsException e) {
1890                 } //an eof will them be generated
1891                 break;
1892               }
1893               if (test > 0) {
1894                 //traditional and annotation comment
1895                 boolean star = false;
1896 //                try { // consume next character
1897                   unicodeAsBackSlash = false;
1898                   currentCharacter = source[currentPosition++];
1899 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1900 //                    && (source[currentPosition] == 'u')) {
1901 //                    getNextUnicodeChar();
1902 //                  } else {
1903                     if (withoutUnicodePtr != 0) {
1904                       withoutUnicodeBuffer[++withoutUnicodePtr] =
1905                         currentCharacter;
1906                     }
1907 //                  };
1908 //                } catch (InvalidInputException ex) {
1909 //                };
1910                 if (currentCharacter == '*') {
1911                   star = true;
1912                 }
1913                 if (recordLineSeparator
1914                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
1915                   pushLineSeparator();
1916                 try { //get the next char 
1917                   currentCharacter = source[currentPosition++];
1918 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
1919 //                    && (source[currentPosition] == 'u')) {
1920 //                    //-------------unicode traitement ------------
1921 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1922 //                    currentPosition++;
1923 //                    while (source[currentPosition] == 'u') {
1924 //                      currentPosition++;
1925 //                    }
1926 //                    if ((c1 =
1927 //                      Character.getNumericValue(source[currentPosition++]))
1928 //                      > 15
1929 //                      || c1 < 0
1930 //                      || (c2 =
1931 //                        Character.getNumericValue(source[currentPosition++]))
1932 //                        > 15
1933 //                      || c2 < 0
1934 //                      || (c3 =
1935 //                        Character.getNumericValue(source[currentPosition++]))
1936 //                        > 15
1937 //                      || c3 < 0
1938 //                      || (c4 =
1939 //                        Character.getNumericValue(source[currentPosition++]))
1940 //                        > 15
1941 //                      || c4 < 0) {
1942 //                      //error don't care of the value
1943 //                      currentCharacter = 'A';
1944 //                    } //something different from * and /
1945 //                    else {
1946 //                      currentCharacter =
1947 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1948 //                    }
1949 //                  }
1950                   //loop until end of comment */ 
1951                   while ((currentCharacter != '/') || (!star)) {
1952                     if (recordLineSeparator
1953                       && ((currentCharacter == '\r')
1954                         || (currentCharacter == '\n')))
1955                       pushLineSeparator();
1956                     star = currentCharacter == '*';
1957                     //get next char
1958                     currentCharacter = source[currentPosition++];
1959 //                    if (((currentCharacter = source[currentPosition++])
1960 //                      == '\\')
1961 //                      && (source[currentPosition] == 'u')) {
1962 //                      //-------------unicode traitement ------------
1963 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
1964 //                      currentPosition++;
1965 //                      while (source[currentPosition] == 'u') {
1966 //                        currentPosition++;
1967 //                      }
1968 //                      if ((c1 =
1969 //                        Character.getNumericValue(source[currentPosition++]))
1970 //                        > 15
1971 //                        || c1 < 0
1972 //                        || (c2 =
1973 //                          Character.getNumericValue(source[currentPosition++]))
1974 //                          > 15
1975 //                        || c2 < 0
1976 //                        || (c3 =
1977 //                          Character.getNumericValue(source[currentPosition++]))
1978 //                          > 15
1979 //                        || c3 < 0
1980 //                        || (c4 =
1981 //                          Character.getNumericValue(source[currentPosition++]))
1982 //                          > 15
1983 //                        || c4 < 0) {
1984 //                        //error don't care of the value
1985 //                        currentCharacter = 'A';
1986 //                      } //something different from * and /
1987 //                      else {
1988 //                        currentCharacter =
1989 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
1990 //                      }
1991 //                    }
1992                   }
1993                 } catch (IndexOutOfBoundsException e) {
1994                   return;
1995                 }
1996                 break;
1997               }
1998               break;
1999             }
2000
2001           default :
2002             if (isPHPIdentifierStart(currentCharacter)
2003               || currentCharacter == '$') {
2004               try {
2005                 scanIdentifierOrKeyword((currentCharacter == '$'));
2006               } catch (InvalidInputException ex) {
2007               };
2008               break;
2009             }
2010             if (Character.isDigit(currentCharacter)) {
2011               try {
2012                 scanNumber(false);
2013               } catch (InvalidInputException ex) {
2014               };
2015               break;
2016             }
2017         }
2018       }
2019       //-----------------end switch while try--------------------
2020     } catch (IndexOutOfBoundsException e) {
2021     } catch (InvalidInputException e) {
2022     }
2023     return;
2024   }
2025 //  public final boolean jumpOverUnicodeWhiteSpace()
2026 //    throws InvalidInputException {
2027 //    //BOOLEAN
2028 //    //handle the case of unicode. Jump over the next whiteSpace
2029 //    //making startPosition pointing on the next available char
2030 //    //On false, the currentCharacter is filled up with a potential
2031 //    //correct char
2032 //
2033 //    try {
2034 //      this.wasAcr = false;
2035 //      int c1, c2, c3, c4;
2036 //      int unicodeSize = 6;
2037 //      currentPosition++;
2038 //      while (source[currentPosition] == 'u') {
2039 //        currentPosition++;
2040 //        unicodeSize++;
2041 //      }
2042 //
2043 //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
2044 //        || c1 < 0)
2045 //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
2046 //          || c2 < 0)
2047 //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
2048 //          || c3 < 0)
2049 //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
2050 //          || c4 < 0)) {
2051 //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2052 //      }
2053 //
2054 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
2055 //      if (recordLineSeparator
2056 //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
2057 //        pushLineSeparator();
2058 //      if (Character.isWhitespace(currentCharacter))
2059 //        return true;
2060 //
2061 //      //buffer the new char which is not a white space
2062 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2063 //      //withoutUnicodePtr == 1 is true here
2064 //      return false;
2065 //    } catch (IndexOutOfBoundsException e) {
2066 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
2067 //    }
2068 //  }
2069   public final int[] getLineEnds() {
2070     //return a bounded copy of this.lineEnds 
2071
2072     int[] copy;
2073     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
2074     return copy;
2075   }
2076
2077   public char[] getSource() {
2078     return this.source;
2079   }
2080   final char[] optimizedCurrentTokenSource1() {
2081     //return always the same char[] build only once
2082
2083     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
2084     char charOne = source[startPosition];
2085     switch (charOne) {
2086       case 'a' :
2087         return charArray_a;
2088       case 'b' :
2089         return charArray_b;
2090       case 'c' :
2091         return charArray_c;
2092       case 'd' :
2093         return charArray_d;
2094       case 'e' :
2095         return charArray_e;
2096       case 'f' :
2097         return charArray_f;
2098       case 'g' :
2099         return charArray_g;
2100       case 'h' :
2101         return charArray_h;
2102       case 'i' :
2103         return charArray_i;
2104       case 'j' :
2105         return charArray_j;
2106       case 'k' :
2107         return charArray_k;
2108       case 'l' :
2109         return charArray_l;
2110       case 'm' :
2111         return charArray_m;
2112       case 'n' :
2113         return charArray_n;
2114       case 'o' :
2115         return charArray_o;
2116       case 'p' :
2117         return charArray_p;
2118       case 'q' :
2119         return charArray_q;
2120       case 'r' :
2121         return charArray_r;
2122       case 's' :
2123         return charArray_s;
2124       case 't' :
2125         return charArray_t;
2126       case 'u' :
2127         return charArray_u;
2128       case 'v' :
2129         return charArray_v;
2130       case 'w' :
2131         return charArray_w;
2132       case 'x' :
2133         return charArray_x;
2134       case 'y' :
2135         return charArray_y;
2136       case 'z' :
2137         return charArray_z;
2138       default :
2139         return new char[] { charOne };
2140     }
2141   }
2142
2143   final char[] optimizedCurrentTokenSource2() {
2144     //try to return the same char[] build only once
2145
2146     char c0, c1;
2147     int hash =
2148       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
2149         % TableSize;
2150     char[][] table = charArray_length[0][hash];
2151     int i = newEntry2;
2152     while (++i < InternalTableSize) {
2153       char[] charArray = table[i];
2154       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2155         return charArray;
2156     }
2157     //---------other side---------
2158     i = -1;
2159     int max = newEntry2;
2160     while (++i <= max) {
2161       char[] charArray = table[i];
2162       if ((c0 == charArray[0]) && (c1 == charArray[1]))
2163         return charArray;
2164     }
2165     //--------add the entry-------
2166     if (++max >= InternalTableSize)
2167       max = 0;
2168     char[] r;
2169     table[max] = (r = new char[] { c0, c1 });
2170     newEntry2 = max;
2171     return r;
2172   }
2173
2174   final char[] optimizedCurrentTokenSource3() {
2175     //try to return the same char[] build only once
2176
2177     char c0, c1, c2;
2178     int hash =
2179       (((c0 = source[startPosition]) << 12)
2180         + ((c1 = source[startPosition + 1]) << 6)
2181         + (c2 = source[startPosition + 2]))
2182         % TableSize;
2183     char[][] table = charArray_length[1][hash];
2184     int i = newEntry3;
2185     while (++i < InternalTableSize) {
2186       char[] charArray = table[i];
2187       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2188         return charArray;
2189     }
2190     //---------other side---------
2191     i = -1;
2192     int max = newEntry3;
2193     while (++i <= max) {
2194       char[] charArray = table[i];
2195       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
2196         return charArray;
2197     }
2198     //--------add the entry-------
2199     if (++max >= InternalTableSize)
2200       max = 0;
2201     char[] r;
2202     table[max] = (r = new char[] { c0, c1, c2 });
2203     newEntry3 = max;
2204     return r;
2205   }
2206
2207   final char[] optimizedCurrentTokenSource4() {
2208     //try to return the same char[] build only once
2209
2210     char c0, c1, c2, c3;
2211     long hash =
2212       ((((long) (c0 = source[startPosition])) << 18)
2213         + ((c1 = source[startPosition + 1]) << 12)
2214         + ((c2 = source[startPosition + 2]) << 6)
2215         + (c3 = source[startPosition + 3]))
2216         % TableSize;
2217     char[][] table = charArray_length[2][(int) hash];
2218     int i = newEntry4;
2219     while (++i < InternalTableSize) {
2220       char[] charArray = table[i];
2221       if ((c0 == charArray[0])
2222         && (c1 == charArray[1])
2223         && (c2 == charArray[2])
2224         && (c3 == charArray[3]))
2225         return charArray;
2226     }
2227     //---------other side---------
2228     i = -1;
2229     int max = newEntry4;
2230     while (++i <= max) {
2231       char[] charArray = table[i];
2232       if ((c0 == charArray[0])
2233         && (c1 == charArray[1])
2234         && (c2 == charArray[2])
2235         && (c3 == charArray[3]))
2236         return charArray;
2237     }
2238     //--------add the entry-------
2239     if (++max >= InternalTableSize)
2240       max = 0;
2241     char[] r;
2242     table[max] = (r = new char[] { c0, c1, c2, c3 });
2243     newEntry4 = max;
2244     return r;
2245
2246   }
2247
2248   final char[] optimizedCurrentTokenSource5() {
2249     //try to return the same char[] build only once
2250
2251     char c0, c1, c2, c3, c4;
2252     long hash =
2253       ((((long) (c0 = source[startPosition])) << 24)
2254         + (((long) (c1 = source[startPosition + 1])) << 18)
2255         + ((c2 = source[startPosition + 2]) << 12)
2256         + ((c3 = source[startPosition + 3]) << 6)
2257         + (c4 = source[startPosition + 4]))
2258         % TableSize;
2259     char[][] table = charArray_length[3][(int) hash];
2260     int i = newEntry5;
2261     while (++i < InternalTableSize) {
2262       char[] charArray = table[i];
2263       if ((c0 == charArray[0])
2264         && (c1 == charArray[1])
2265         && (c2 == charArray[2])
2266         && (c3 == charArray[3])
2267         && (c4 == charArray[4]))
2268         return charArray;
2269     }
2270     //---------other side---------
2271     i = -1;
2272     int max = newEntry5;
2273     while (++i <= max) {
2274       char[] charArray = table[i];
2275       if ((c0 == charArray[0])
2276         && (c1 == charArray[1])
2277         && (c2 == charArray[2])
2278         && (c3 == charArray[3])
2279         && (c4 == charArray[4]))
2280         return charArray;
2281     }
2282     //--------add the entry-------
2283     if (++max >= InternalTableSize)
2284       max = 0;
2285     char[] r;
2286     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
2287     newEntry5 = max;
2288     return r;
2289
2290   }
2291
2292   final char[] optimizedCurrentTokenSource6() {
2293     //try to return the same char[] build only once
2294
2295     char c0, c1, c2, c3, c4, c5;
2296     long hash =
2297       ((((long) (c0 = source[startPosition])) << 32)
2298         + (((long) (c1 = source[startPosition + 1])) << 24)
2299         + (((long) (c2 = source[startPosition + 2])) << 18)
2300         + ((c3 = source[startPosition + 3]) << 12)
2301         + ((c4 = source[startPosition + 4]) << 6)
2302         + (c5 = source[startPosition + 5]))
2303         % TableSize;
2304     char[][] table = charArray_length[4][(int) hash];
2305     int i = newEntry6;
2306     while (++i < InternalTableSize) {
2307       char[] charArray = table[i];
2308       if ((c0 == charArray[0])
2309         && (c1 == charArray[1])
2310         && (c2 == charArray[2])
2311         && (c3 == charArray[3])
2312         && (c4 == charArray[4])
2313         && (c5 == charArray[5]))
2314         return charArray;
2315     }
2316     //---------other side---------
2317     i = -1;
2318     int max = newEntry6;
2319     while (++i <= max) {
2320       char[] charArray = table[i];
2321       if ((c0 == charArray[0])
2322         && (c1 == charArray[1])
2323         && (c2 == charArray[2])
2324         && (c3 == charArray[3])
2325         && (c4 == charArray[4])
2326         && (c5 == charArray[5]))
2327         return charArray;
2328     }
2329     //--------add the entry-------
2330     if (++max >= InternalTableSize)
2331       max = 0;
2332     char[] r;
2333     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
2334     newEntry6 = max;
2335     return r;
2336   }
2337
2338   public final void pushLineSeparator() throws InvalidInputException {
2339     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2340     final int INCREMENT = 250;
2341
2342     if (this.checkNonExternalizedStringLiterals) {
2343       // reinitialize the current line for non externalize strings purpose
2344       currentLine = null;
2345     }
2346     //currentCharacter is at position currentPosition-1
2347
2348     // cr 000D
2349     if (currentCharacter == '\r') {
2350       int separatorPos = currentPosition - 1;
2351       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2352         return;
2353       //System.out.println("CR-" + separatorPos);
2354       try {
2355         lineEnds[++linePtr] = separatorPos;
2356       } catch (IndexOutOfBoundsException e) {
2357         //linePtr value is correct
2358         int oldLength = lineEnds.length;
2359         int[] old = lineEnds;
2360         lineEnds = new int[oldLength + INCREMENT];
2361         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2362         lineEnds[linePtr] = separatorPos;
2363       }
2364       // look-ahead for merged cr+lf
2365       try {
2366         if (source[currentPosition] == '\n') {
2367           //System.out.println("look-ahead LF-" + currentPosition);                     
2368           lineEnds[linePtr] = currentPosition;
2369           currentPosition++;
2370           wasAcr = false;
2371         } else {
2372           wasAcr = true;
2373         }
2374       } catch (IndexOutOfBoundsException e) {
2375         wasAcr = true;
2376       }
2377     } else {
2378       // lf 000A
2379       if (currentCharacter == '\n') {
2380         //must merge eventual cr followed by lf
2381         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
2382           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2383           lineEnds[linePtr] = currentPosition - 1;
2384         } else {
2385           int separatorPos = currentPosition - 1;
2386           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2387             return;
2388           // System.out.println("LF-" + separatorPos);                                                  
2389           try {
2390             lineEnds[++linePtr] = separatorPos;
2391           } catch (IndexOutOfBoundsException e) {
2392             //linePtr value is correct
2393             int oldLength = lineEnds.length;
2394             int[] old = lineEnds;
2395             lineEnds = new int[oldLength + INCREMENT];
2396             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2397             lineEnds[linePtr] = separatorPos;
2398           }
2399         }
2400         wasAcr = false;
2401       }
2402     }
2403   }
2404   public final void pushUnicodeLineSeparator() {
2405     // isUnicode means that the \r or \n has been read as a unicode character
2406
2407     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
2408
2409     final int INCREMENT = 250;
2410     //currentCharacter is at position currentPosition-1
2411
2412     if (this.checkNonExternalizedStringLiterals) {
2413       // reinitialize the current line for non externalize strings purpose
2414       currentLine = null;
2415     }
2416
2417     // cr 000D
2418     if (currentCharacter == '\r') {
2419       int separatorPos = currentPosition - 6;
2420       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2421         return;
2422       //System.out.println("CR-" + separatorPos);
2423       try {
2424         lineEnds[++linePtr] = separatorPos;
2425       } catch (IndexOutOfBoundsException e) {
2426         //linePtr value is correct
2427         int oldLength = lineEnds.length;
2428         int[] old = lineEnds;
2429         lineEnds = new int[oldLength + INCREMENT];
2430         System.arraycopy(old, 0, lineEnds, 0, oldLength);
2431         lineEnds[linePtr] = separatorPos;
2432       }
2433       // look-ahead for merged cr+lf
2434       if (source[currentPosition] == '\n') {
2435         //System.out.println("look-ahead LF-" + currentPosition);                       
2436         lineEnds[linePtr] = currentPosition;
2437         currentPosition++;
2438         wasAcr = false;
2439       } else {
2440         wasAcr = true;
2441       }
2442     } else {
2443       // lf 000A
2444       if (currentCharacter == '\n') {
2445         //must merge eventual cr followed by lf
2446         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
2447           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
2448           lineEnds[linePtr] = currentPosition - 6;
2449         } else {
2450           int separatorPos = currentPosition - 6;
2451           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
2452             return;
2453           // System.out.println("LF-" + separatorPos);                                                  
2454           try {
2455             lineEnds[++linePtr] = separatorPos;
2456           } catch (IndexOutOfBoundsException e) {
2457             //linePtr value is correct
2458             int oldLength = lineEnds.length;
2459             int[] old = lineEnds;
2460             lineEnds = new int[oldLength + INCREMENT];
2461             System.arraycopy(old, 0, lineEnds, 0, oldLength);
2462             lineEnds[linePtr] = separatorPos;
2463           }
2464         }
2465         wasAcr = false;
2466       }
2467     }
2468   }
2469   public final void recordComment(boolean isJavadoc) {
2470
2471     // a new annotation comment is recorded
2472     try {
2473       commentStops[++commentPtr] =
2474         isJavadoc ? currentPosition : -currentPosition;
2475     } catch (IndexOutOfBoundsException e) {
2476       int oldStackLength = commentStops.length;
2477       int[] oldStack = commentStops;
2478       commentStops = new int[oldStackLength + 30];
2479       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
2480       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
2481       //grows the positions buffers too
2482       int[] old = commentStarts;
2483       commentStarts = new int[oldStackLength + 30];
2484       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
2485     }
2486
2487     //the buffer is of a correct size here
2488     commentStarts[commentPtr] = startPosition;
2489   }
2490   public void resetTo(int begin, int end) {
2491     //reset the scanner to a given position where it may rescan again
2492
2493     diet = false;
2494     initialPosition = startPosition = currentPosition = begin;
2495     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
2496     commentPtr = -1; // reset comment stack
2497   }
2498
2499   public final void scanSingleQuotedEscapeCharacter()
2500     throws InvalidInputException {
2501     // the string with "\\u" is a legal string of two chars \ and u
2502     //thus we use a direct access to the source (for regular cases).
2503
2504 //    if (unicodeAsBackSlash) {
2505 //      // consume next character
2506 //      unicodeAsBackSlash = false;
2507 //      if (((currentCharacter = source[currentPosition++]) == '\\')
2508 //        && (source[currentPosition] == 'u')) {
2509 //        getNextUnicodeChar();
2510 //      } else {
2511 //        if (withoutUnicodePtr != 0) {
2512 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2513 //        }
2514 //      }
2515 //    } else
2516       currentCharacter = source[currentPosition++];
2517     switch (currentCharacter) {
2518       case '\'' :
2519         currentCharacter = '\'';
2520         break;
2521       case '\\' :
2522         currentCharacter = '\\';
2523         break;
2524       default :
2525         currentCharacter = '\\';
2526         currentPosition--;
2527     }
2528   }
2529
2530   public final void scanDoubleQuotedEscapeCharacter()
2531     throws InvalidInputException {
2532     // the string with "\\u" is a legal string of two chars \ and u
2533     //thus we use a direct access to the source (for regular cases).
2534
2535 //    if (unicodeAsBackSlash) {
2536 //      // consume next character
2537 //      unicodeAsBackSlash = false;
2538 //      if (((currentCharacter = source[currentPosition++]) == '\\')
2539 //        && (source[currentPosition] == 'u')) {
2540 //        getNextUnicodeChar();
2541 //      } else {
2542 //        if (withoutUnicodePtr != 0) {
2543 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
2544 //        }
2545 //      }
2546 //    } else
2547       currentCharacter = source[currentPosition++];
2548     switch (currentCharacter) {
2549       //      case 'b' :
2550       //        currentCharacter = '\b';
2551       //        break;
2552       case 't' :
2553         currentCharacter = '\t';
2554         break;
2555       case 'n' :
2556         currentCharacter = '\n';
2557         break;
2558         //      case 'f' :
2559         //        currentCharacter = '\f';
2560         //        break;
2561       case 'r' :
2562         currentCharacter = '\r';
2563         break;
2564       case '\"' :
2565         currentCharacter = '\"';
2566         break;
2567       case '\'' :
2568         currentCharacter = '\'';
2569         break;
2570       case '\\' :
2571         currentCharacter = '\\';
2572         break;
2573       case '$' :
2574         currentCharacter = '$';
2575         break;
2576       default :
2577         // -----------octal escape--------------
2578         // OctalDigit
2579         // OctalDigit OctalDigit
2580         // ZeroToThree OctalDigit OctalDigit
2581
2582         int number = Character.getNumericValue(currentCharacter);
2583         if (number >= 0 && number <= 7) {
2584           boolean zeroToThreeNot = number > 3;
2585           if (Character
2586             .isDigit(currentCharacter = source[currentPosition++])) {
2587             int digit = Character.getNumericValue(currentCharacter);
2588             if (digit >= 0 && digit <= 7) {
2589               number = (number * 8) + digit;
2590               if (Character
2591                 .isDigit(currentCharacter = source[currentPosition++])) {
2592                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
2593                   currentPosition--;
2594                 } else {
2595                   digit = Character.getNumericValue(currentCharacter);
2596                   if (digit >= 0 && digit <= 7) {
2597                     // has read \ZeroToThree OctalDigit OctalDigit
2598                     number = (number * 8) + digit;
2599                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
2600                     currentPosition--;
2601                   }
2602                 }
2603               } else { // has read \OctalDigit NonDigit--> ignore last character
2604                 currentPosition--;
2605               }
2606             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
2607               currentPosition--;
2608             }
2609           } else { // has read \OctalDigit --> ignore last character
2610             currentPosition--;
2611           }
2612           if (number > 255)
2613             throw new InvalidInputException(INVALID_ESCAPE);
2614           currentCharacter = (char) number;
2615         }
2616         //else
2617         //     throw new InvalidInputException(INVALID_ESCAPE);
2618     }
2619   }
2620
2621   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
2622   //    return scanIdentifierOrKeyword( false );
2623   //  }
2624
2625   public int scanIdentifierOrKeyword(boolean isVariable) 
2626     throws InvalidInputException {
2627     //test keywords
2628
2629     //first dispatch on the first char.
2630     //then the length. If there are several
2631     //keywors with the same length AND the same first char, then do another
2632     //disptach on the second char :-)...cool....but fast !
2633
2634     useAssertAsAnIndentifier = false;
2635
2636     while (getNextCharAsJavaIdentifierPart()) {
2637     };
2638
2639     if (isVariable) {
2640       return TokenNameVariable;
2641     }
2642     int index, length;
2643     char[] data;
2644     char firstLetter;
2645 //    if (withoutUnicodePtr == 0)
2646
2647       //quick test on length == 1 but not on length > 12 while most identifier
2648       //have a length which is <= 12...but there are lots of identifier with
2649       //only one char....
2650
2651 //      {
2652       if ((length = currentPosition - startPosition) == 1)
2653         return TokenNameIdentifier;
2654       //  data = source;
2655       data = new char[length];
2656       index = startPosition;
2657       for (int i = 0; i < length; i++) {
2658         data[i] = Character.toLowerCase(source[index + i]);
2659       }
2660       index = 0;
2661 //    } else {
2662 //      if ((length = withoutUnicodePtr) == 1)
2663 //        return TokenNameIdentifier;
2664 //      // data = withoutUnicodeBuffer;
2665 //      data = new char[withoutUnicodeBuffer.length];
2666 //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
2667 //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
2668 //      }
2669 //      index = 1;
2670 //    }
2671
2672     firstLetter = data[index];
2673     switch (firstLetter) {
2674
2675       case 'a' : // as and array
2676         switch (length) {
2677           case 2 : //as
2678             if ((data[++index] == 's')) {
2679               return TokenNameas;
2680             } else {
2681               return TokenNameIdentifier;
2682             }
2683           case 3 : //and
2684             if ((data[++index] == 'n') && (data[++index] == 'd')) {
2685               return TokenNameAND;
2686             } else {
2687               return TokenNameIdentifier;
2688             }
2689             //          case 5 :
2690             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
2691             //              return TokenNamearray;
2692             //            else
2693             //              return TokenNameIdentifier;
2694           default :
2695             return TokenNameIdentifier;
2696         }
2697       case 'b' : //break
2698         switch (length) {
2699           case 5 :
2700             if ((data[++index] == 'r')
2701               && (data[++index] == 'e')
2702               && (data[++index] == 'a')
2703               && (data[++index] == 'k'))
2704               return TokenNamebreak;
2705             else
2706               return TokenNameIdentifier;
2707           default :
2708             return TokenNameIdentifier;
2709         }
2710
2711       case 'c' : //case class continue
2712         switch (length) {
2713           case 4 :
2714             if ((data[++index] == 'a')
2715               && (data[++index] == 's')
2716               && (data[++index] == 'e'))
2717               return TokenNamecase;
2718             else
2719               return TokenNameIdentifier;
2720           case 5 :
2721             if ((data[++index] == 'l')
2722               && (data[++index] == 'a')
2723               && (data[++index] == 's')
2724               && (data[++index] == 's'))
2725               return TokenNameclass;
2726             else
2727               return TokenNameIdentifier;
2728           case 8 :
2729             if ((data[++index] == 'o')
2730               && (data[++index] == 'n')
2731               && (data[++index] == 't')
2732               && (data[++index] == 'i')
2733               && (data[++index] == 'n')
2734               && (data[++index] == 'u')
2735               && (data[++index] == 'e'))
2736               return TokenNamecontinue;
2737             else
2738               return TokenNameIdentifier;
2739           default :
2740             return TokenNameIdentifier;
2741         }
2742
2743       case 'd' : //define default do 
2744         switch (length) {
2745           case 2 :
2746             if ((data[++index] == 'o'))
2747               return TokenNamedo;
2748             else
2749               return TokenNameIdentifier;
2750           case 6 :
2751             if ((data[++index] == 'e')
2752               && (data[++index] == 'f')
2753               && (data[++index] == 'i')
2754               && (data[++index] == 'n')
2755               && (data[++index] == 'e'))
2756               return TokenNamedefine;
2757             else
2758               return TokenNameIdentifier;
2759           case 7 :
2760             if ((data[++index] == 'e')
2761               && (data[++index] == 'f')
2762               && (data[++index] == 'a')
2763               && (data[++index] == 'u')
2764               && (data[++index] == 'l')
2765               && (data[++index] == 't'))
2766               return TokenNamedefault;
2767             else
2768               return TokenNameIdentifier;
2769           default :
2770             return TokenNameIdentifier;
2771         }
2772       case 'e' : //echo else elseif extends
2773         switch (length) {
2774           case 4 :
2775             if ((data[++index] == 'c')
2776               && (data[++index] == 'h')
2777               && (data[++index] == 'o'))
2778               return TokenNameecho;
2779             else if (
2780               (data[index] == 'l')
2781                 && (data[++index] == 's')
2782                 && (data[++index] == 'e'))
2783               return TokenNameelse;
2784             else
2785               return TokenNameIdentifier;
2786           case 5 : // endif
2787             if ((data[++index] == 'n')
2788               && (data[++index] == 'd')
2789               && (data[++index] == 'i')
2790               && (data[++index] == 'f'))
2791               return TokenNameendif;
2792             else
2793               return TokenNameIdentifier;
2794           case 6 : // endfor
2795             if ((data[++index] == 'n')
2796               && (data[++index] == 'd')
2797               && (data[++index] == 'f')
2798               && (data[++index] == 'o')
2799               && (data[++index] == 'r'))
2800               return TokenNameendfor;
2801             else if (
2802               (data[index] == 'l')
2803                 && (data[++index] == 's')
2804                 && (data[++index] == 'e')
2805                 && (data[++index] == 'i')
2806                 && (data[++index] == 'f'))
2807               return TokenNameelseif;
2808             else
2809               return TokenNameIdentifier;
2810           case 7 :
2811             if ((data[++index] == 'x')
2812               && (data[++index] == 't')
2813               && (data[++index] == 'e')
2814               && (data[++index] == 'n')
2815               && (data[++index] == 'd')
2816               && (data[++index] == 's'))
2817               return TokenNameextends;
2818             else
2819               return TokenNameIdentifier;
2820           case 8 : // endwhile
2821             if ((data[++index] == 'n')
2822               && (data[++index] == 'd')
2823               && (data[++index] == 'w')
2824               && (data[++index] == 'h')
2825               && (data[++index] == 'i')
2826               && (data[++index] == 'l')
2827               && (data[++index] == 'e'))
2828               return TokenNameendwhile;
2829             else
2830               return TokenNameIdentifier;
2831           case 9 : // endswitch
2832             if ((data[++index] == 'n')
2833               && (data[++index] == 'd')
2834               && (data[++index] == 's')
2835               && (data[++index] == 'w')
2836               && (data[++index] == 'i')
2837               && (data[++index] == 't')
2838               && (data[++index] == 'c')
2839               && (data[++index] == 'h'))
2840               return TokenNameendswitch;
2841             else
2842               return TokenNameIdentifier;
2843           case 10 : // endforeach
2844             if ((data[++index] == 'n')
2845               && (data[++index] == 'd')
2846               && (data[++index] == 'f')
2847               && (data[++index] == 'o')
2848               && (data[++index] == 'r')
2849               && (data[++index] == 'e')
2850               && (data[++index] == 'a')
2851               && (data[++index] == 'c')
2852               && (data[++index] == 'h'))
2853               return TokenNameendforeach;
2854             else
2855               return TokenNameIdentifier;
2856
2857           default :
2858             return TokenNameIdentifier;
2859         }
2860
2861       case 'f' : //for false function
2862         switch (length) {
2863           case 3 :
2864             if ((data[++index] == 'o') && (data[++index] == 'r'))
2865               return TokenNamefor;
2866             else
2867               return TokenNameIdentifier;
2868           case 5 :
2869             if ((data[++index] == 'a')
2870               && (data[++index] == 'l')
2871               && (data[++index] == 's')
2872               && (data[++index] == 'e'))
2873               return TokenNamefalse;
2874             else
2875               return TokenNameIdentifier;
2876           case 7 : // function
2877             if ((data[++index] == 'o')
2878               && (data[++index] == 'r')
2879               && (data[++index] == 'e')
2880               && (data[++index] == 'a')
2881               && (data[++index] == 'c')
2882               && (data[++index] == 'h'))
2883               return TokenNameforeach;
2884             else
2885               return TokenNameIdentifier;
2886           case 8 : // function
2887             if ((data[++index] == 'u')
2888               && (data[++index] == 'n')
2889               && (data[++index] == 'c')
2890               && (data[++index] == 't')
2891               && (data[++index] == 'i')
2892               && (data[++index] == 'o')
2893               && (data[++index] == 'n'))
2894               return TokenNamefunction;
2895             else
2896               return TokenNameIdentifier;
2897           default :
2898             return TokenNameIdentifier;
2899         }
2900       case 'g' : //global
2901         if (length == 6) {
2902           if ((data[++index] == 'l')
2903             && (data[++index] == 'o')
2904             && (data[++index] == 'b')
2905             && (data[++index] == 'a')
2906             && (data[++index] == 'l')) {
2907             return TokenNameglobal;
2908           }
2909         }
2910         return TokenNameIdentifier;
2911
2912       case 'i' : //if int 
2913         switch (length) {
2914           case 2 :
2915             if (data[++index] == 'f')
2916               return TokenNameif;
2917             else
2918               return TokenNameIdentifier;
2919             //          case 3 :
2920             //            if ((data[++index] == 'n') && (data[++index] == 't'))
2921             //              return TokenNameint;
2922             //            else
2923             //              return TokenNameIdentifier;
2924           case 7 :
2925             if ((data[++index] == 'n')
2926               && (data[++index] == 'c')
2927               && (data[++index] == 'l')
2928               && (data[++index] == 'u')
2929               && (data[++index] == 'd')
2930               && (data[++index] == 'e'))
2931               return TokenNameinclude;
2932             else
2933               return TokenNameIdentifier;
2934           case 12 :
2935             if ((data[++index] == 'n')
2936               && (data[++index] == 'c')
2937               && (data[++index] == 'l')
2938               && (data[++index] == 'u')
2939               && (data[++index] == 'd')
2940               && (data[++index] == 'e')
2941               && (data[++index] == '_')
2942               && (data[++index] == 'o')
2943               && (data[++index] == 'n')
2944               && (data[++index] == 'c')
2945               && (data[++index] == 'e'))
2946               return TokenNameinclude_once;
2947             else
2948               return TokenNameIdentifier;
2949           default :
2950             return TokenNameIdentifier;
2951         }
2952
2953       case 'l' : //list
2954         if (length == 4) {
2955           if ((data[++index] == 'i')
2956             && (data[++index] == 's')
2957             && (data[++index] == 't')) {
2958             return TokenNamelist;
2959           }
2960         }
2961         return TokenNameIdentifier;
2962
2963       case 'n' : // new null
2964         switch (length) {
2965           case 3 :
2966             if ((data[++index] == 'e') && (data[++index] == 'w'))
2967               return TokenNamenew;
2968             else
2969               return TokenNameIdentifier;
2970           case 4 :
2971             if ((data[++index] == 'u')
2972               && (data[++index] == 'l')
2973               && (data[++index] == 'l'))
2974               return TokenNamenull;
2975             else
2976               return TokenNameIdentifier;
2977
2978           default :
2979             return TokenNameIdentifier;
2980         }
2981       case 'o' : // or old_function
2982         if (length == 2) {
2983           if (data[++index] == 'r') {
2984             return TokenNameOR;
2985           }
2986         }
2987         //        if (length == 12) {
2988         //          if ((data[++index] == 'l')
2989         //            && (data[++index] == 'd')
2990         //            && (data[++index] == '_')
2991         //            && (data[++index] == 'f')
2992         //            && (data[++index] == 'u')
2993         //            && (data[++index] == 'n')
2994         //            && (data[++index] == 'c')
2995         //            && (data[++index] == 't')
2996         //            && (data[++index] == 'i')
2997         //            && (data[++index] == 'o')
2998         //            && (data[++index] == 'n')) {
2999         //            return TokenNameold_function;
3000         //          }
3001         //        }
3002         return TokenNameIdentifier;
3003
3004       case 'p' : // print
3005         if (length == 5) {
3006           if ((data[++index] == 'r')
3007             && (data[++index] == 'i')
3008             && (data[++index] == 'n')
3009             && (data[++index] == 't')) {
3010             return TokenNameprint;
3011           }
3012         }
3013         return TokenNameIdentifier;
3014       case 'r' : //return require require_once
3015         if (length == 6) {
3016           if ((data[++index] == 'e')
3017             && (data[++index] == 't')
3018             && (data[++index] == 'u')
3019             && (data[++index] == 'r')
3020             && (data[++index] == 'n')) {
3021             return TokenNamereturn;
3022           }
3023         } else if (length == 7) {
3024           if ((data[++index] == 'e')
3025             && (data[++index] == 'q')
3026             && (data[++index] == 'u')
3027             && (data[++index] == 'i')
3028             && (data[++index] == 'r')
3029             && (data[++index] == 'e')) {
3030             return TokenNamerequire;
3031           }
3032         } else if (length == 12) {
3033           if ((data[++index] == 'e')
3034             && (data[++index] == 'q')
3035             && (data[++index] == 'u')
3036             && (data[++index] == 'i')
3037             && (data[++index] == 'r')
3038             && (data[++index] == 'e')
3039             && (data[++index] == '_')
3040             && (data[++index] == 'o')
3041             && (data[++index] == 'n')
3042             && (data[++index] == 'c')
3043             && (data[++index] == 'e')) {
3044             return TokenNamerequire_once;
3045           }
3046         } else
3047           return TokenNameIdentifier;
3048
3049       case 's' : //static switch 
3050         switch (length) {
3051           case 6 :
3052             if (data[++index] == 't')
3053               if ((data[++index] == 'a')
3054                 && (data[++index] == 't')
3055                 && (data[++index] == 'i')
3056                 && (data[++index] == 'c')) {
3057                 return TokenNamestatic;
3058               } else
3059                 return TokenNameIdentifier;
3060             else if (
3061               (data[index] == 'w')
3062                 && (data[++index] == 'i')
3063                 && (data[++index] == 't')
3064                 && (data[++index] == 'c')
3065                 && (data[++index] == 'h'))
3066               return TokenNameswitch;
3067             else
3068               return TokenNameIdentifier;
3069           default :
3070             return TokenNameIdentifier;
3071         }
3072
3073       case 't' : // true
3074         switch (length) {
3075
3076           case 4 :
3077             if ((data[++index] == 'r')
3078               && (data[++index] == 'u')
3079               && (data[++index] == 'e'))
3080               return TokenNametrue;
3081             else
3082               return TokenNameIdentifier;
3083             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
3084             //              return TokenNamethis;
3085
3086           default :
3087             return TokenNameIdentifier;
3088         }
3089
3090       case 'v' : //var 
3091         switch (length) {
3092           case 3 :
3093             if ((data[++index] == 'a') && (data[++index] == 'r'))
3094               return TokenNamevar;
3095             else
3096               return TokenNameIdentifier;
3097
3098           default :
3099             return TokenNameIdentifier;
3100         }
3101
3102       case 'w' : //while 
3103         switch (length) {
3104           case 5 :
3105             if ((data[++index] == 'h')
3106               && (data[++index] == 'i')
3107               && (data[++index] == 'l')
3108               && (data[++index] == 'e'))
3109               return TokenNamewhile;
3110             else
3111               return TokenNameIdentifier;
3112             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
3113             //return TokenNamewidefp ;
3114             //else
3115             //return TokenNameIdentifier;
3116           default :
3117             return TokenNameIdentifier;
3118         }
3119
3120       case 'x' : //xor
3121         switch (length) {
3122           case 3 :
3123             if ((data[++index] == 'o') && (data[++index] == 'r'))
3124               return TokenNameXOR;
3125             else
3126               return TokenNameIdentifier;
3127
3128           default :
3129             return TokenNameIdentifier;
3130         }
3131       default :
3132         return TokenNameIdentifier;
3133     }
3134   }
3135   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
3136
3137     //when entering this method the currentCharacter is the firt
3138     //digit of the number , i.e. it may be preceeded by a . when
3139     //dotPrefix is true
3140
3141     boolean floating = dotPrefix;
3142     if ((!dotPrefix) && (currentCharacter == '0')) {
3143       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
3144         //force the first char of the hexa number do exist...
3145         // consume next character
3146         unicodeAsBackSlash = false;
3147         currentCharacter = source[currentPosition++];
3148 //        if (((currentCharacter = source[currentPosition++]) == '\\')
3149 //          && (source[currentPosition] == 'u')) {
3150 //          getNextUnicodeChar();
3151 //        } else {
3152 //          if (withoutUnicodePtr != 0) {
3153 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3154 //          }
3155 //        }
3156         if (Character.digit(currentCharacter, 16) == -1)
3157           throw new InvalidInputException(INVALID_HEXA);
3158         //---end forcing--
3159         while (getNextCharAsDigit(16)) {
3160         };
3161         //        if (getNextChar('l', 'L') >= 0)
3162         //          return TokenNameLongLiteral;
3163         //        else
3164         return TokenNameIntegerLiteral;
3165       }
3166
3167       //there is x or X in the number
3168       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
3169       if (getNextCharAsDigit()) {
3170         //-------------potential octal-----------------
3171         while (getNextCharAsDigit()) {
3172         };
3173
3174         //        if (getNextChar('l', 'L') >= 0) {
3175         //          return TokenNameLongLiteral;
3176         //        }
3177         //
3178         //        if (getNextChar('f', 'F') >= 0) {
3179         //          return TokenNameFloatingPointLiteral;
3180         //        }
3181
3182         if (getNextChar('d', 'D') >= 0) {
3183           return TokenNameDoubleLiteral;
3184         } else { //make the distinction between octal and float ....
3185           if (getNextChar('.')) { //bingo ! ....
3186             while (getNextCharAsDigit()) {
3187             };
3188             if (getNextChar('e', 'E') >= 0) {
3189               // consume next character
3190               unicodeAsBackSlash = false;
3191               currentCharacter = source[currentPosition++];
3192 //              if (((currentCharacter = source[currentPosition++]) == '\\')
3193 //                && (source[currentPosition] == 'u')) {
3194 //                getNextUnicodeChar();
3195 //              } else {
3196 //                if (withoutUnicodePtr != 0) {
3197 //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3198 //                }
3199 //              }
3200
3201               if ((currentCharacter == '-') || (currentCharacter == '+')) {
3202                 // consume next character
3203                 unicodeAsBackSlash = false;
3204                 currentCharacter = source[currentPosition++];
3205 //                if (((currentCharacter = source[currentPosition++]) == '\\')
3206 //                  && (source[currentPosition] == 'u')) {
3207 //                  getNextUnicodeChar();
3208 //                } else {
3209 //                  if (withoutUnicodePtr != 0) {
3210 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
3211 //                      currentCharacter;
3212 //                  }
3213 //                }
3214               }
3215               if (!Character.isDigit(currentCharacter))
3216                 throw new InvalidInputException(INVALID_FLOAT);
3217               while (getNextCharAsDigit()) {
3218               };
3219             }
3220             //            if (getNextChar('f', 'F') >= 0)
3221             //              return TokenNameFloatingPointLiteral;
3222             getNextChar('d', 'D'); //jump over potential d or D
3223             return TokenNameDoubleLiteral;
3224           } else {
3225             return TokenNameIntegerLiteral;
3226           }
3227         }
3228       } else {
3229         /* carry on */
3230       }
3231     }
3232
3233     while (getNextCharAsDigit()) {
3234     };
3235
3236     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
3237     //      return TokenNameLongLiteral;
3238
3239     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
3240       while (getNextCharAsDigit()) {
3241       };
3242       floating = true;
3243     }
3244
3245     //if floating is true both exponant and suffix may be optional
3246
3247     if (getNextChar('e', 'E') >= 0) {
3248       floating = true;
3249       // consume next character
3250       unicodeAsBackSlash = false;
3251       currentCharacter = source[currentPosition++];
3252 //      if (((currentCharacter = source[currentPosition++]) == '\\')
3253 //        && (source[currentPosition] == 'u')) {
3254 //        getNextUnicodeChar();
3255 //      } else {
3256 //        if (withoutUnicodePtr != 0) {
3257 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3258 //        }
3259 //      }
3260
3261       if ((currentCharacter == '-')
3262         || (currentCharacter == '+')) { // consume next character
3263         unicodeAsBackSlash = false;
3264         currentCharacter = source[currentPosition++];
3265 //        if (((currentCharacter = source[currentPosition++]) == '\\')
3266 //          && (source[currentPosition] == 'u')) {
3267 //          getNextUnicodeChar();
3268 //        } else {
3269 //          if (withoutUnicodePtr != 0) {
3270 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
3271 //          }
3272 //        }
3273       }
3274       if (!Character.isDigit(currentCharacter))
3275         throw new InvalidInputException(INVALID_FLOAT);
3276       while (getNextCharAsDigit()) {
3277       };
3278     }
3279
3280     if (getNextChar('d', 'D') >= 0)
3281       return TokenNameDoubleLiteral;
3282     //    if (getNextChar('f', 'F') >= 0)
3283     //      return TokenNameFloatingPointLiteral;
3284
3285     //the long flag has been tested before
3286
3287     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
3288   }
3289   /**
3290    * Search the line number corresponding to a specific position
3291    *
3292    */
3293   public final int getLineNumber(int position) {
3294
3295     if (lineEnds == null)
3296       return 1;
3297     int length = linePtr + 1;
3298     if (length == 0)
3299       return 1;
3300     int g = 0, d = length - 1;
3301     int m = 0;
3302     while (g <= d) {
3303       m = (g + d) / 2;
3304       if (position < lineEnds[m]) {
3305         d = m - 1;
3306       } else if (position > lineEnds[m]) {
3307         g = m + 1;
3308       } else {
3309         return m + 1;
3310       }
3311     }
3312     if (position < lineEnds[m]) {
3313       return m + 1;
3314     }
3315     return m + 2;
3316   }
3317
3318   public void setPHPMode(boolean mode) {
3319     phpMode = mode;
3320   }
3321
3322   public final void setSource(char[] source) {
3323     //the source-buffer is set to sourceString
3324
3325     if (source == null) {
3326       this.source = new char[0];
3327     } else {
3328       this.source = source;
3329     }
3330     startPosition = -1;
3331     initialPosition = currentPosition = 0;
3332     containsAssertKeyword = false;
3333     withoutUnicodeBuffer = new char[this.source.length];
3334
3335   }
3336
3337   public String toString() {
3338     if (startPosition == source.length)
3339       return "EOF\n\n" + new String(source); //$NON-NLS-1$
3340     if (currentPosition > source.length)
3341       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
3342
3343     char front[] = new char[startPosition];
3344     System.arraycopy(source, 0, front, 0, startPosition);
3345
3346     int middleLength = (currentPosition - 1) - startPosition + 1;
3347     char middle[];
3348     if (middleLength > -1) {
3349       middle = new char[middleLength];
3350       System.arraycopy(source, startPosition, middle, 0, middleLength);
3351     } else {
3352       middle = new char[0];
3353     }
3354
3355     char end[] = new char[source.length - (currentPosition - 1)];
3356     System.arraycopy(
3357       source,
3358       (currentPosition - 1) + 1,
3359       end,
3360       0,
3361       source.length - (currentPosition - 1) - 1);
3362
3363     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
3364     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
3365     + new String(end);
3366   }
3367   public final String toStringAction(int act) {
3368     switch (act) {
3369       case TokenNameERROR :
3370         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3371       case TokenNameStopPHP :
3372         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3373       case TokenNameIdentifier :
3374         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3375       case TokenNameVariable :
3376         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3377       case TokenNameas :
3378         return "as"; //$NON-NLS-1$
3379       case TokenNamebreak :
3380         return "break"; //$NON-NLS-1$
3381       case TokenNamecase :
3382         return "case"; //$NON-NLS-1$
3383       case TokenNameclass :
3384         return "class"; //$NON-NLS-1$
3385       case TokenNamecontinue :
3386         return "continue"; //$NON-NLS-1$
3387       case TokenNamedefault :
3388         return "default"; //$NON-NLS-1$
3389       case TokenNamedefine :
3390         return "define"; //$NON-NLS-1$
3391       case TokenNamedo :
3392         return "do"; //$NON-NLS-1$
3393       case TokenNameecho :
3394         return "echo"; //$NON-NLS-1$
3395       case TokenNameelse :
3396         return "else"; //$NON-NLS-1$
3397       case TokenNameelseif :
3398         return "elseif"; //$NON-NLS-1$
3399       case TokenNameendfor :
3400         return "endfor"; //$NON-NLS-1$
3401       case TokenNameendforeach :
3402         return "endforeach"; //$NON-NLS-1$
3403       case TokenNameendif :
3404         return "endif"; //$NON-NLS-1$
3405       case TokenNameendswitch :
3406         return "endswitch"; //$NON-NLS-1$
3407       case TokenNameendwhile :
3408         return "endwhile"; //$NON-NLS-1$
3409       case TokenNameextends :
3410         return "extends"; //$NON-NLS-1$
3411       case TokenNamefalse :
3412         return "false"; //$NON-NLS-1$
3413       case TokenNamefor :
3414         return "for"; //$NON-NLS-1$
3415       case TokenNameforeach :
3416         return "foreach"; //$NON-NLS-1$
3417       case TokenNamefunction :
3418         return "function"; //$NON-NLS-1$
3419       case TokenNameglobal :
3420         return "global"; //$NON-NLS-1$
3421       case TokenNameif :
3422         return "if"; //$NON-NLS-1$
3423       case TokenNameinclude :
3424         return "include"; //$NON-NLS-1$
3425       case TokenNameinclude_once :
3426         return "include_once"; //$NON-NLS-1$
3427       case TokenNamelist :
3428         return "list"; //$NON-NLS-1$
3429       case TokenNamenew :
3430         return "new"; //$NON-NLS-1$
3431       case TokenNamenull :
3432         return "null"; //$NON-NLS-1$
3433       case TokenNameprint :
3434         return "print"; //$NON-NLS-1$
3435       case TokenNamerequire :
3436         return "require"; //$NON-NLS-1$
3437       case TokenNamerequire_once :
3438         return "require_once"; //$NON-NLS-1$
3439       case TokenNamereturn :
3440         return "return"; //$NON-NLS-1$
3441       case TokenNamestatic :
3442         return "static"; //$NON-NLS-1$
3443       case TokenNameswitch :
3444         return "switch"; //$NON-NLS-1$
3445       case TokenNametrue :
3446         return "true"; //$NON-NLS-1$
3447       case TokenNamevar :
3448         return "var"; //$NON-NLS-1$
3449       case TokenNamewhile :
3450         return "while"; //$NON-NLS-1$
3451       case TokenNameIntegerLiteral :
3452         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3453       case TokenNameDoubleLiteral :
3454         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3455       case TokenNameStringLiteral :
3456         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3457       case TokenNameStringConstant :
3458         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3459       case TokenNameStringInterpolated :
3460         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
3461       case TokenNameHEREDOC :
3462         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
3463
3464       case TokenNamePLUS_PLUS :
3465         return "++"; //$NON-NLS-1$
3466       case TokenNameMINUS_MINUS :
3467         return "--"; //$NON-NLS-1$
3468       case TokenNameEQUAL_EQUAL :
3469         return "=="; //$NON-NLS-1$
3470       case TokenNameEQUAL_GREATER :
3471         return "=>"; //$NON-NLS-1$
3472       case TokenNameLESS_EQUAL :
3473         return "<="; //$NON-NLS-1$
3474       case TokenNameGREATER_EQUAL :
3475         return ">="; //$NON-NLS-1$
3476       case TokenNameNOT_EQUAL :
3477         return "!="; //$NON-NLS-1$
3478       case TokenNameLEFT_SHIFT :
3479         return "<<"; //$NON-NLS-1$
3480       case TokenNameRIGHT_SHIFT :
3481         return ">>"; //$NON-NLS-1$
3482       case TokenNamePLUS_EQUAL :
3483         return "+="; //$NON-NLS-1$
3484       case TokenNameMINUS_EQUAL :
3485         return "-="; //$NON-NLS-1$
3486       case TokenNameMULTIPLY_EQUAL :
3487         return "*="; //$NON-NLS-1$
3488       case TokenNameDIVIDE_EQUAL :
3489         return "/="; //$NON-NLS-1$
3490       case TokenNameAND_EQUAL :
3491         return "&="; //$NON-NLS-1$
3492       case TokenNameOR_EQUAL :
3493         return "|="; //$NON-NLS-1$
3494       case TokenNameXOR_EQUAL :
3495         return "^="; //$NON-NLS-1$
3496       case TokenNameREMAINDER_EQUAL :
3497         return "%="; //$NON-NLS-1$
3498       case TokenNameLEFT_SHIFT_EQUAL :
3499         return "<<="; //$NON-NLS-1$
3500       case TokenNameRIGHT_SHIFT_EQUAL :
3501         return ">>="; //$NON-NLS-1$
3502       case TokenNameOR_OR :
3503         return "||"; //$NON-NLS-1$
3504       case TokenNameAND_AND :
3505         return "&&"; //$NON-NLS-1$
3506       case TokenNamePLUS :
3507         return "+"; //$NON-NLS-1$
3508       case TokenNameMINUS :
3509         return "-"; //$NON-NLS-1$
3510       case TokenNameMINUS_GREATER :
3511         return "->";
3512       case TokenNameNOT :
3513         return "!"; //$NON-NLS-1$
3514       case TokenNameREMAINDER :
3515         return "%"; //$NON-NLS-1$
3516       case TokenNameXOR :
3517         return "^"; //$NON-NLS-1$
3518       case TokenNameAND :
3519         return "&"; //$NON-NLS-1$
3520       case TokenNameMULTIPLY :
3521         return "*"; //$NON-NLS-1$
3522       case TokenNameOR :
3523         return "|"; //$NON-NLS-1$
3524       case TokenNameTWIDDLE :
3525         return "~"; //$NON-NLS-1$
3526       case TokenNameTWIDDLE_EQUAL :
3527         return "~="; //$NON-NLS-1$
3528       case TokenNameDIVIDE :
3529         return "/"; //$NON-NLS-1$
3530       case TokenNameGREATER :
3531         return ">"; //$NON-NLS-1$
3532       case TokenNameLESS :
3533         return "<"; //$NON-NLS-1$
3534       case TokenNameLPAREN :
3535         return "("; //$NON-NLS-1$
3536       case TokenNameRPAREN :
3537         return ")"; //$NON-NLS-1$
3538       case TokenNameLBRACE :
3539         return "{"; //$NON-NLS-1$
3540       case TokenNameRBRACE :
3541         return "}"; //$NON-NLS-1$
3542       case TokenNameLBRACKET :
3543         return "["; //$NON-NLS-1$
3544       case TokenNameRBRACKET :
3545         return "]"; //$NON-NLS-1$
3546       case TokenNameSEMICOLON :
3547         return ";"; //$NON-NLS-1$
3548       case TokenNameQUESTION :
3549         return "?"; //$NON-NLS-1$
3550       case TokenNameCOLON :
3551         return ":"; //$NON-NLS-1$
3552       case TokenNameCOMMA :
3553         return ","; //$NON-NLS-1$
3554       case TokenNameDOT :
3555         return "."; //$NON-NLS-1$
3556       case TokenNameEQUAL :
3557         return "="; //$NON-NLS-1$
3558       case TokenNameAT :
3559         return "@";
3560       case TokenNameDOLLAR_LBRACE :
3561         return "${";
3562       case TokenNameEOF :
3563         return "EOF"; //$NON-NLS-1$
3564       default :
3565         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
3566     }
3567   }
3568
3569   public Scanner(
3570     boolean tokenizeComments,
3571     boolean tokenizeWhiteSpace,
3572     boolean checkNonExternalizedStringLiterals) {
3573     this(
3574       tokenizeComments,
3575       tokenizeWhiteSpace,
3576       checkNonExternalizedStringLiterals,
3577       false);
3578   }
3579
3580   public Scanner(
3581     boolean tokenizeComments,
3582     boolean tokenizeWhiteSpace,
3583     boolean checkNonExternalizedStringLiterals,
3584     boolean assertMode) {
3585     this.eofPosition = Integer.MAX_VALUE;
3586     this.tokenizeComments = tokenizeComments;
3587     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
3588     this.checkNonExternalizedStringLiterals =
3589       checkNonExternalizedStringLiterals;
3590     this.assertMode = assertMode;
3591   }
3592
3593   private void checkNonExternalizeString() throws InvalidInputException {
3594     if (currentLine == null)
3595       return;
3596     parseTags(currentLine);
3597   }
3598
3599   private void parseTags(NLSLine line) throws InvalidInputException {
3600     String s = new String(getCurrentTokenSource());
3601     int pos = s.indexOf(TAG_PREFIX);
3602     int lineLength = line.size();
3603     while (pos != -1) {
3604       int start = pos + TAG_PREFIX_LENGTH;
3605       int end = s.indexOf(TAG_POSTFIX, start);
3606       String index = s.substring(start, end);
3607       int i = 0;
3608       try {
3609         i = Integer.parseInt(index) - 1;
3610         // Tags are one based not zero based.
3611       } catch (NumberFormatException e) {
3612         i = -1; // we don't want to consider this as a valid NLS tag
3613       }
3614       if (line.exists(i)) {
3615         line.set(i, null);
3616       }
3617       pos = s.indexOf(TAG_PREFIX, start);
3618     }
3619
3620     this.nonNLSStrings = new StringLiteral[lineLength];
3621     int nonNLSCounter = 0;
3622     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
3623       StringLiteral literal = (StringLiteral) iterator.next();
3624       if (literal != null) {
3625         this.nonNLSStrings[nonNLSCounter++] = literal;
3626       }
3627     }
3628     if (nonNLSCounter == 0) {
3629       this.nonNLSStrings = null;
3630       currentLine = null;
3631       return;
3632     }
3633     this.wasNonExternalizedStringLiteral = true;
3634     if (nonNLSCounter != lineLength) {
3635       System.arraycopy(
3636         this.nonNLSStrings,
3637         0,
3638         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),
3639         0,
3640         nonNLSCounter);
3641     }
3642     currentLine = null;
3643   }
3644 }