1 /*******************************************************************************
 
   2  * Copyright (c) 2000, 2001, 2002 International Business Machines Corp. and others.
 
   3  * All rights reserved. This program and the accompanying materials 
 
   4  * are made available under the terms of the Common Public License v0.5 
 
   5  * which accompanies this distribution, and is available at
 
   6  * http://www.eclipse.org/legal/cpl-v05.html
 
   9  *     IBM Corporation - initial API and implementation
 
  10  ******************************************************************************/
 
  11 package net.sourceforge.phpdt.internal.compiler.parser;
 
  13 import java.util.ArrayList;
 
  14 import java.util.Iterator;
 
  15 import java.util.List;
 
  17 import net.sourceforge.phpdt.core.compiler.*;
 
  18 import net.sourceforge.phpdt.internal.compiler.ast.StringLiteral;
 
  20 public class Scanner implements IScanner, ITerminalSymbols {
 
  23    - getNextToken() which return the current type of the token
 
  24      (this value is not memorized by the scanner)
 
  25    - getCurrentTokenSource() which provides with the token "REAL" source
 
  26      (aka all unicode have been transformed into a correct char)
 
  27    - sourceStart gives the position into the stream
 
  28    - currentPosition-1 gives the sourceEnd position into the stream 
 
  32   private boolean assertMode;
 
  33   public boolean useAssertAsAnIndentifier = false;
 
  34   //flag indicating if processed source contains occurrences of keyword assert 
 
  35   public boolean containsAssertKeyword = false;
 
  37   public boolean recordLineSeparator;
 
  38   public boolean phpMode = false;
 
  40   public char currentCharacter;
 
  41   public int startPosition;
 
  42   public int currentPosition;
 
  43   public int initialPosition, eofPosition;
 
  44   // after this position eof are generated instead of real token from the source
 
  46   public boolean tokenizeComments;
 
  47   public boolean tokenizeWhiteSpace;
 
  49   //source should be viewed as a window (aka a part)
 
  50   //of a entire very large stream
 
  54   public char[] withoutUnicodeBuffer;
 
  55   public int withoutUnicodePtr;
 
  56   //when == 0 ==> no unicode in the current token
 
  57   public boolean unicodeAsBackSlash = false;
 
  59   public boolean scanningFloatLiteral = false;
 
  61   //support for /** comments
 
  62   //public char[][] comments = new char[10][];
 
  63   public int[] commentStops = new int[10];
 
  64   public int[] commentStarts = new int[10];
 
  65   public int commentPtr = -1; // no comment test with commentPtr value -1
 
  67   //diet parsing support - jump over some method body when requested
 
  68   public boolean diet = false;
 
  70   //support for the  poor-line-debuggers ....
 
  71   //remember the position of the cr/lf
 
  72   public int[] lineEnds = new int[250];
 
  73   public int linePtr = -1;
 
  74   public boolean wasAcr = false;
 
  76   public static final String END_OF_SOURCE = "End_Of_Source"; //$NON-NLS-1$
 
  78   public static final String INVALID_HEXA = "Invalid_Hexa_Literal"; //$NON-NLS-1$
 
  79   public static final String INVALID_OCTAL = "Invalid_Octal_Literal"; //$NON-NLS-1$
 
  80   public static final String INVALID_CHARACTER_CONSTANT = "Invalid_Character_Constant"; //$NON-NLS-1$
 
  81   public static final String INVALID_ESCAPE = "Invalid_Escape"; //$NON-NLS-1$
 
  82   public static final String INVALID_INPUT = "Invalid_Input"; //$NON-NLS-1$
 
  83   public static final String INVALID_UNICODE_ESCAPE = "Invalid_Unicode_Escape"; //$NON-NLS-1$
 
  84   public static final String INVALID_FLOAT = "Invalid_Float_Literal"; //$NON-NLS-1$
 
  86   public static final String NULL_SOURCE_STRING = "Null_Source_String"; //$NON-NLS-1$
 
  87   public static final String UNTERMINATED_STRING = "Unterminated_String"; //$NON-NLS-1$
 
  88   public static final String UNTERMINATED_COMMENT = "Unterminated_Comment"; //$NON-NLS-1$
 
  89   public static final String INVALID_CHAR_IN_STRING = "Invalid_Char_In_String"; //$NON-NLS-1$
 
  91   //----------------optimized identifier managment------------------
 
  92   static final char[] charArray_a = new char[] { 'a' },
 
  93     charArray_b = new char[] { 'b' },
 
  94     charArray_c = new char[] { 'c' },
 
  95     charArray_d = new char[] { 'd' },
 
  96     charArray_e = new char[] { 'e' },
 
  97     charArray_f = new char[] { 'f' },
 
  98     charArray_g = new char[] { 'g' },
 
  99     charArray_h = new char[] { 'h' },
 
 100     charArray_i = new char[] { 'i' },
 
 101     charArray_j = new char[] { 'j' },
 
 102     charArray_k = new char[] { 'k' },
 
 103     charArray_l = new char[] { 'l' },
 
 104     charArray_m = new char[] { 'm' },
 
 105     charArray_n = new char[] { 'n' },
 
 106     charArray_o = new char[] { 'o' },
 
 107     charArray_p = new char[] { 'p' },
 
 108     charArray_q = new char[] { 'q' },
 
 109     charArray_r = new char[] { 'r' },
 
 110     charArray_s = new char[] { 's' },
 
 111     charArray_t = new char[] { 't' },
 
 112     charArray_u = new char[] { 'u' },
 
 113     charArray_v = new char[] { 'v' },
 
 114     charArray_w = new char[] { 'w' },
 
 115     charArray_x = new char[] { 'x' },
 
 116     charArray_y = new char[] { 'y' },
 
 117     charArray_z = new char[] { 'z' };
 
 119   static final char[] initCharArray =
 
 120     new char[] { '\u0000', '\u0000', '\u0000', '\u0000', '\u0000', '\u0000' };
 
 121   static final int TableSize = 30, InternalTableSize = 6;
 
 123   public static final int OptimizedLength = 6;
 
 125   final char[][][][] charArray_length =
 
 126     new char[OptimizedLength][TableSize][InternalTableSize][];
 
 127   // support for detecting non-externalized string literals
 
 128   int currentLineNr = -1;
 
 129   int previousLineNr = -1;
 
 130   NLSLine currentLine = null;
 
 131   List lines = new ArrayList();
 
 132   public static final String TAG_PREFIX = "//$NON-NLS-"; //$NON-NLS-1$
 
 133   public static final int TAG_PREFIX_LENGTH = TAG_PREFIX.length();
 
 134   public static final String TAG_POSTFIX = "$"; //$NON-NLS-1$
 
 135   public static final int TAG_POSTFIX_LENGTH = TAG_POSTFIX.length();
 
 136   public StringLiteral[] nonNLSStrings = null;
 
 137   public boolean checkNonExternalizedStringLiterals = true;
 
 138   public boolean wasNonExternalizedStringLiteral = false;
 
 141     for (int i = 0; i < 6; i++) {
 
 142       for (int j = 0; j < TableSize; j++) {
 
 143         for (int k = 0; k < InternalTableSize; k++) {
 
 144           charArray_length[i][j][k] = initCharArray;
 
 149   static int newEntry2 = 0,
 
 155   public static final int RoundBracket = 0;
 
 156   public static final int SquareBracket = 1;
 
 157   public static final int CurlyBracket = 2;
 
 158   public static final int BracketKinds = 3;
 
 160   public static final boolean DEBUG = false;
 
 164   public Scanner(boolean tokenizeComments, boolean tokenizeWhiteSpace) {
 
 165     this(tokenizeComments, tokenizeWhiteSpace, false);
 
 169    * Determines if the specified character is
 
 170    * permissible as the first character in a PHP identifier
 
 172   public static boolean isPHPIdentifierStart(char ch) {
 
 173     return Character.isLetter(ch)
 
 175       || (0x7F <= ch && ch <= 0xFF);
 
 179    * Determines if the specified character may be part of a PHP identifier as
 
 180    * other than the first character
 
 182   public static boolean isPHPIdentifierPart(char ch) {
 
 183     return Character.isLetterOrDigit(ch)
 
 185       || (0x7F <= ch && ch <= 0xFF);
 
 188   public final boolean atEnd() {
 
 189     // This code is not relevant if source is 
 
 190     // Only a part of the real stream input
 
 192     return source.length == currentPosition;
 
 194   public char[] getCurrentIdentifierSource() {
 
 195     //return the token REAL source (aka unicodes are precomputed)
 
 198 //    if (withoutUnicodePtr != 0)
 
 199 //      //0 is used as a fast test flag so the real first char is in position 1
 
 201 //        withoutUnicodeBuffer,
 
 203 //        result = new char[withoutUnicodePtr],
 
 205 //        withoutUnicodePtr);
 
 207       int length = currentPosition - startPosition;
 
 208       switch (length) { // see OptimizedLength
 
 210           return optimizedCurrentTokenSource1();
 
 212           return optimizedCurrentTokenSource2();
 
 214           return optimizedCurrentTokenSource3();
 
 216           return optimizedCurrentTokenSource4();
 
 218           return optimizedCurrentTokenSource5();
 
 220           return optimizedCurrentTokenSource6();
 
 226         result = new char[length],
 
 232   public int getCurrentTokenEndPosition() {
 
 233     return this.currentPosition - 1;
 
 235   public final char[] getCurrentTokenSource() {
 
 236     // Return the token REAL source (aka unicodes are precomputed)
 
 239 //    if (withoutUnicodePtr != 0)
 
 240 //      // 0 is used as a fast test flag so the real first char is in position 1
 
 242 //        withoutUnicodeBuffer,
 
 244 //        result = new char[withoutUnicodePtr],
 
 246 //        withoutUnicodePtr);
 
 252         result = new char[length = currentPosition - startPosition],
 
 259   public final char[] getCurrentTokenSource(int startPos) {
 
 260     // Return the token REAL source (aka unicodes are precomputed)
 
 263 //    if (withoutUnicodePtr != 0)
 
 264 //      // 0 is used as a fast test flag so the real first char is in position 1
 
 266 //        withoutUnicodeBuffer,
 
 268 //        result = new char[withoutUnicodePtr],
 
 270 //        withoutUnicodePtr);
 
 276         result = new char[length = currentPosition - startPos],
 
 283   public final char[] getCurrentTokenSourceString() {
 
 284     //return the token REAL source (aka unicodes are precomputed).
 
 285     //REMOVE the two " that are at the beginning and the end.
 
 288     if (withoutUnicodePtr != 0)
 
 289       //0 is used as a fast test flag so the real first char is in position 1
 
 290       System.arraycopy(withoutUnicodeBuffer, 2,
 
 291       //2 is 1 (real start) + 1 (to jump over the ")
 
 292       result = new char[withoutUnicodePtr - 2], 0, withoutUnicodePtr - 2);
 
 298         result = new char[length = currentPosition - startPosition - 2],
 
 304   public int getCurrentTokenStartPosition() {
 
 305     return this.startPosition;
 
 308    * Search the source position corresponding to the end of a given line number
 
 310    * Line numbers are 1-based, and relative to the scanner initialPosition. 
 
 311    * Character positions are 0-based.
 
 313    * In case the given line number is inconsistent, answers -1.
 
 315   public final int getLineEnd(int lineNumber) {
 
 317     if (lineEnds == null)
 
 319     if (lineNumber >= lineEnds.length)
 
 324     if (lineNumber == lineEnds.length - 1)
 
 326     return lineEnds[lineNumber - 1];
 
 327     // next line start one character behind the lineEnd of the previous line
 
 330    * Search the source position corresponding to the beginning of a given line number
 
 332    * Line numbers are 1-based, and relative to the scanner initialPosition. 
 
 333    * Character positions are 0-based.
 
 335    * e.g.       getLineStart(1) --> 0   i.e. first line starts at character 0.
 
 337    * In case the given line number is inconsistent, answers -1.
 
 339   public final int getLineStart(int lineNumber) {
 
 341     if (lineEnds == null)
 
 343     if (lineNumber >= lineEnds.length)
 
 349       return initialPosition;
 
 350     return lineEnds[lineNumber - 2] + 1;
 
 351     // next line start one character behind the lineEnd of the previous line
 
 353   public final boolean getNextChar(char testedChar) {
 
 355     //handle the case of unicode.
 
 356     //when a unicode appears then we must use a buffer that holds char internal values
 
 357     //At the end of this method currentCharacter holds the new visited char
 
 358     //and currentPosition points right next after it
 
 359     //Both previous lines are true if the currentCharacter is == to the testedChar
 
 360     //On false, no side effect has occured.
 
 362     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 364     int temp = currentPosition;
 
 366       currentCharacter = source[currentPosition++];
 
 367 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 368 //        && (source[currentPosition] == 'u')) {
 
 369 //        //-------------unicode traitement ------------
 
 370 //        int c1, c2, c3, c4;
 
 371 //        int unicodeSize = 6;
 
 372 //        currentPosition++;
 
 373 //        while (source[currentPosition] == 'u') {
 
 374 //          currentPosition++;
 
 378 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 380 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 382 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 384 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 386 //          currentPosition = temp;
 
 390 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 391 //        if (currentCharacter != testedChar) {
 
 392 //          currentPosition = temp;
 
 395 //        unicodeAsBackSlash = currentCharacter == '\\';
 
 397 //        //need the unicode buffer
 
 398 //        if (withoutUnicodePtr == 0) {
 
 399 //          //buffer all the entries that have been left aside....
 
 400 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 404 //            withoutUnicodeBuffer,
 
 406 //            withoutUnicodePtr);
 
 408 //        //fill the buffer with the char
 
 409 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 412 //      } //-------------end unicode traitement--------------
 
 414         if (currentCharacter != testedChar) {
 
 415           currentPosition = temp;
 
 418         unicodeAsBackSlash = false;
 
 419 //        if (withoutUnicodePtr != 0)
 
 420 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 423     } catch (IndexOutOfBoundsException e) {
 
 424       unicodeAsBackSlash = false;
 
 425       currentPosition = temp;
 
 429   public final int getNextChar(char testedChar1, char testedChar2) {
 
 430     //INT 0 : testChar1 \\\\///\\\\ 1 : testedChar2 \\\\///\\\\ -1 : others
 
 431     //test can be done with (x==0) for the first and (x>0) for the second
 
 432     //handle the case of unicode.
 
 433     //when a unicode appears then we must use a buffer that holds char internal values
 
 434     //At the end of this method currentCharacter holds the new visited char
 
 435     //and currentPosition points right next after it
 
 436     //Both previous lines are true if the currentCharacter is == to the testedChar1/2
 
 437     //On false, no side effect has occured.
 
 439     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 441     int temp = currentPosition;
 
 444       currentCharacter = source[currentPosition++];
 
 445 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 446 //        && (source[currentPosition] == 'u')) {
 
 447 //        //-------------unicode traitement ------------
 
 448 //        int c1, c2, c3, c4;
 
 449 //        int unicodeSize = 6;
 
 450 //        currentPosition++;
 
 451 //        while (source[currentPosition] == 'u') {
 
 452 //          currentPosition++;
 
 456 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 458 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 460 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 462 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 464 //          currentPosition = temp;
 
 468 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 469 //        if (currentCharacter == testedChar1)
 
 471 //        else if (currentCharacter == testedChar2)
 
 474 //          currentPosition = temp;
 
 478 //        //need the unicode buffer
 
 479 //        if (withoutUnicodePtr == 0) {
 
 480 //          //buffer all the entries that have been left aside....
 
 481 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 485 //            withoutUnicodeBuffer,
 
 487 //            withoutUnicodePtr);
 
 489 //        //fill the buffer with the char
 
 490 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 492 //      } //-------------end unicode traitement--------------
 
 494         if (currentCharacter == testedChar1)
 
 496         else if (currentCharacter == testedChar2)
 
 499           currentPosition = temp;
 
 503 //        if (withoutUnicodePtr != 0)
 
 504 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 507     } catch (IndexOutOfBoundsException e) {
 
 508       currentPosition = temp;
 
 512   public final boolean getNextCharAsDigit() {
 
 514     //handle the case of unicode.
 
 515     //when a unicode appears then we must use a buffer that holds char internal values
 
 516     //At the end of this method currentCharacter holds the new visited char
 
 517     //and currentPosition points right next after it
 
 518     //Both previous lines are true if the currentCharacter is a digit
 
 519     //On false, no side effect has occured.
 
 521     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 523     int temp = currentPosition;
 
 525       currentCharacter = source[currentPosition++];
 
 526 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 527 //        && (source[currentPosition] == 'u')) {
 
 528 //        //-------------unicode traitement ------------
 
 529 //        int c1, c2, c3, c4;
 
 530 //        int unicodeSize = 6;
 
 531 //        currentPosition++;
 
 532 //        while (source[currentPosition] == 'u') {
 
 533 //          currentPosition++;
 
 537 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 539 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 541 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 543 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 545 //          currentPosition = temp;
 
 549 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 550 //        if (!Character.isDigit(currentCharacter)) {
 
 551 //          currentPosition = temp;
 
 555 //        //need the unicode buffer
 
 556 //        if (withoutUnicodePtr == 0) {
 
 557 //          //buffer all the entries that have been left aside....
 
 558 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 562 //            withoutUnicodeBuffer,
 
 564 //            withoutUnicodePtr);
 
 566 //        //fill the buffer with the char
 
 567 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 569 //      } //-------------end unicode traitement--------------
 
 571         if (!Character.isDigit(currentCharacter)) {
 
 572           currentPosition = temp;
 
 575 //        if (withoutUnicodePtr != 0)
 
 576 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 579     } catch (IndexOutOfBoundsException e) {
 
 580       currentPosition = temp;
 
 584   public final boolean getNextCharAsDigit(int radix) {
 
 586     //handle the case of unicode.
 
 587     //when a unicode appears then we must use a buffer that holds char internal values
 
 588     //At the end of this method currentCharacter holds the new visited char
 
 589     //and currentPosition points right next after it
 
 590     //Both previous lines are true if the currentCharacter is a digit base on radix
 
 591     //On false, no side effect has occured.
 
 593     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 595     int temp = currentPosition;
 
 597       currentCharacter = source[currentPosition++];
 
 598 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 599 //        && (source[currentPosition] == 'u')) {
 
 600 //        //-------------unicode traitement ------------
 
 601 //        int c1, c2, c3, c4;
 
 602 //        int unicodeSize = 6;
 
 603 //        currentPosition++;
 
 604 //        while (source[currentPosition] == 'u') {
 
 605 //          currentPosition++;
 
 609 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 611 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 613 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 615 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 617 //          currentPosition = temp;
 
 621 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 622 //        if (Character.digit(currentCharacter, radix) == -1) {
 
 623 //          currentPosition = temp;
 
 627 //        //need the unicode buffer
 
 628 //        if (withoutUnicodePtr == 0) {
 
 629 //          //buffer all the entries that have been left aside....
 
 630 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 634 //            withoutUnicodeBuffer,
 
 636 //            withoutUnicodePtr);
 
 638 //        //fill the buffer with the char
 
 639 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 641 //      } //-------------end unicode traitement--------------
 
 643         if (Character.digit(currentCharacter, radix) == -1) {
 
 644           currentPosition = temp;
 
 647 //        if (withoutUnicodePtr != 0)
 
 648 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 651     } catch (IndexOutOfBoundsException e) {
 
 652       currentPosition = temp;
 
 656   public boolean getNextCharAsJavaIdentifierPart() {
 
 658     //handle the case of unicode.
 
 659     //when a unicode appears then we must use a buffer that holds char internal values
 
 660     //At the end of this method currentCharacter holds the new visited char
 
 661     //and currentPosition points right next after it
 
 662     //Both previous lines are true if the currentCharacter is a JavaIdentifierPart
 
 663     //On false, no side effect has occured.
 
 665     //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
 667     int temp = currentPosition;
 
 669       currentCharacter = source[currentPosition++];
 
 670 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
 671 //        && (source[currentPosition] == 'u')) {
 
 672 //        //-------------unicode traitement ------------
 
 673 //        int c1, c2, c3, c4;
 
 674 //        int unicodeSize = 6;
 
 675 //        currentPosition++;
 
 676 //        while (source[currentPosition] == 'u') {
 
 677 //          currentPosition++;
 
 681 //        if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
 683 //          || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
 685 //          || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
 687 //          || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
 689 //          currentPosition = temp;
 
 693 //        currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
 694 //        if (!isPHPIdentifierPart(currentCharacter)) {
 
 695 //          currentPosition = temp;
 
 699 //        //need the unicode buffer
 
 700 //        if (withoutUnicodePtr == 0) {
 
 701 //          //buffer all the entries that have been left aside....
 
 702 //          withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
 706 //            withoutUnicodeBuffer,
 
 708 //            withoutUnicodePtr);
 
 710 //        //fill the buffer with the char
 
 711 //        withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 713 //      } //-------------end unicode traitement--------------
 
 715         if (!isPHPIdentifierPart(currentCharacter)) {
 
 716           currentPosition = temp;
 
 720 //        if (withoutUnicodePtr != 0)
 
 721 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
 724     } catch (IndexOutOfBoundsException e) {
 
 725       currentPosition = temp;
 
 730   public int getNextToken() throws InvalidInputException {
 
 731     int htmlPosition = currentPosition;
 
 734         currentCharacter = source[currentPosition++];
 
 735         if (currentCharacter == '<') {
 
 736           if (getNextChar('?')) {
 
 737             currentCharacter = source[currentPosition++];
 
 738             if ((currentCharacter == ' ')
 
 739               || Character.isWhitespace(currentCharacter)) {
 
 741               startPosition = currentPosition;
 
 743               if (tokenizeWhiteSpace) {
 
 744                 // && (whiteStart != currentPosition - 1)) {
 
 745                 // reposition scanner in case we are interested by spaces as tokens
 
 746                 startPosition = htmlPosition;
 
 747                 return TokenNameHTML;
 
 751                 (currentCharacter == 'P') || (currentCharacter == 'p');
 
 753                 int test = getNextChar('H', 'h');
 
 755                   test = getNextChar('P', 'p');
 
 758                     startPosition = currentPosition;
 
 761                     if (tokenizeWhiteSpace) {
 
 762                       // && (whiteStart != currentPosition - 1)) {
 
 763                       // reposition scanner in case we are interested by spaces as tokens
 
 764                       startPosition = htmlPosition;
 
 765                       return TokenNameHTML;
 
 774         if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
 775           if (recordLineSeparator) {
 
 782     } //-----------------end switch while try--------------------
 
 783     catch (IndexOutOfBoundsException e) {
 
 784       if (tokenizeWhiteSpace) {
 
 785         // && (whiteStart != currentPosition - 1)) {
 
 786         // reposition scanner in case we are interested by spaces as tokens
 
 787         startPosition = htmlPosition;
 
 795         jumpOverMethodBody();
 
 797         return currentPosition > source.length ? TokenNameEOF : TokenNameRBRACE;
 
 800         while (true) { //loop for jumping over comments
 
 801           withoutUnicodePtr = 0;
 
 802           //start with a new token (even comment written with unicode )
 
 804           // ---------Consume white space and handles startPosition---------
 
 805           int whiteStart = currentPosition;
 
 806           boolean isWhiteSpace;
 
 808             startPosition = currentPosition;
 
 809             currentCharacter = source[currentPosition++];
 
 810 //            if (((currentCharacter = source[currentPosition++]) == '\\')
 
 811 //              && (source[currentPosition] == 'u')) {
 
 812 //              isWhiteSpace = jumpOverUnicodeWhiteSpace();
 
 814               if ((currentCharacter == '\r') || (currentCharacter == '\n')) {
 
 815                 checkNonExternalizeString();
 
 816                 if (recordLineSeparator) {
 
 823                 (currentCharacter == ' ')
 
 824                   || Character.isWhitespace(currentCharacter);
 
 826           } while (isWhiteSpace);
 
 827           if (tokenizeWhiteSpace && (whiteStart != currentPosition - 1)) {
 
 828             // reposition scanner in case we are interested by spaces as tokens
 
 830             startPosition = whiteStart;
 
 831             return TokenNameWHITESPACE;
 
 833           //little trick to get out in the middle of a source compuation
 
 834           if (currentPosition > eofPosition)
 
 837           // ---------Identify the next token-------------
 
 839           switch (currentCharacter) {
 
 841               return TokenNameLPAREN;
 
 843               return TokenNameRPAREN;
 
 845               return TokenNameLBRACE;
 
 847               return TokenNameRBRACE;
 
 849               return TokenNameLBRACKET;
 
 851               return TokenNameRBRACKET;
 
 853               return TokenNameSEMICOLON;
 
 855               return TokenNameCOMMA;
 
 858               if (getNextCharAsDigit())
 
 859                 return scanNumber(true);
 
 864                 if ((test = getNextChar('+', '=')) == 0)
 
 865                   return TokenNamePLUS_PLUS;
 
 867                   return TokenNamePLUS_EQUAL;
 
 868                 return TokenNamePLUS;
 
 873                 if ((test = getNextChar('-', '=')) == 0)
 
 874                   return TokenNameMINUS_MINUS;
 
 876                   return TokenNameMINUS_EQUAL;
 
 877                 if (getNextChar('>'))
 
 878                   return TokenNameMINUS_GREATER;
 
 880                 return TokenNameMINUS;
 
 883               if (getNextChar('='))
 
 884                 return TokenNameTWIDDLE_EQUAL;
 
 885               return TokenNameTWIDDLE;
 
 887               if (getNextChar('='))
 
 888                 return TokenNameNOT_EQUAL;
 
 891               if (getNextChar('='))
 
 892                 return TokenNameMULTIPLY_EQUAL;
 
 893               return TokenNameMULTIPLY;
 
 895               if (getNextChar('='))
 
 896                 return TokenNameREMAINDER_EQUAL;
 
 897               return TokenNameREMAINDER;
 
 901                 if ((test = getNextChar('=', '<')) == 0)
 
 902                   return TokenNameLESS_EQUAL;
 
 904                   if (getNextChar('='))
 
 905                     return TokenNameLEFT_SHIFT_EQUAL;
 
 906                   if (getNextChar('<')) {
 
 907                     int heredocStart = currentPosition;
 
 908                     int heredocLength = 0;
 
 909                     currentCharacter = source[currentPosition++];
 
 910                     if (isPHPIdentifierStart(currentCharacter)) {
 
 911                       currentCharacter = source[currentPosition++];
 
 913                       return TokenNameERROR;
 
 915                     while (isPHPIdentifierPart(currentCharacter)) {
 
 916                       currentCharacter = source[currentPosition++];
 
 919                     heredocLength = currentPosition - heredocStart - 1;
 
 921                     // heredoc end-tag determination
 
 922                     boolean endTag = true;
 
 925                       ch = source[currentPosition++];
 
 926                       if (ch == '\r' || ch == '\n') {
 
 927                         if (recordLineSeparator) {
 
 932                         for (int i = 0; i < heredocLength; i++) {
 
 933                           if (source[currentPosition + i]
 
 934                             != source[heredocStart + i]) {
 
 940                           currentPosition += heredocLength - 1;
 
 941                           currentCharacter = source[currentPosition++];
 
 942                           break; // do...while loop
 
 950                     return TokenNameHEREDOC;
 
 952                   return TokenNameLEFT_SHIFT;
 
 954                 return TokenNameLESS;
 
 959                 if ((test = getNextChar('=', '>')) == 0)
 
 960                   return TokenNameGREATER_EQUAL;
 
 962                   if ((test = getNextChar('=', '>')) == 0)
 
 963                     return TokenNameRIGHT_SHIFT_EQUAL;
 
 964                   return TokenNameRIGHT_SHIFT;
 
 966                 return TokenNameGREATER;
 
 969               if (getNextChar('='))
 
 970                 return TokenNameEQUAL_EQUAL;
 
 971               if (getNextChar('>'))
 
 972                 return TokenNameEQUAL_GREATER;
 
 973               return TokenNameEQUAL;
 
 977                 if ((test = getNextChar('&', '=')) == 0)
 
 978                   return TokenNameAND_AND;
 
 980                   return TokenNameAND_EQUAL;
 
 986                 if ((test = getNextChar('|', '=')) == 0)
 
 987                   return TokenNameOR_OR;
 
 989                   return TokenNameOR_EQUAL;
 
 993               if (getNextChar('='))
 
 994                 return TokenNameXOR_EQUAL;
 
 997               if (getNextChar('>')) {
 
 999                 return TokenNameStopPHP;
 
1001               return TokenNameQUESTION;
 
1003               if (getNextChar(':'))
 
1004                 return TokenNameCOLON_COLON;
 
1005               return TokenNameCOLON;
 
1011               //                                                        if ((test = getNextChar('\n', '\r')) == 0) {
 
1012               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1015               //                                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1016               //                                                                for (int lookAhead = 0;
 
1019               //                                                                        if (currentPosition + lookAhead
 
1020               //                                                                                == source.length)
 
1022               //                                                                        if (source[currentPosition + lookAhead]
 
1025               //                                                                        if (source[currentPosition + lookAhead]
 
1027               //                                                                                currentPosition += lookAhead + 1;
 
1031               //                                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1034               //                                                if (getNextChar('\'')) {
 
1035               //                                                        // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1036               //                                                        for (int lookAhead = 0;
 
1039               //                                                                if (currentPosition + lookAhead
 
1040               //                                                                        == source.length)
 
1042               //                                                                if (source[currentPosition + lookAhead]
 
1045               //                                                                if (source[currentPosition + lookAhead]
 
1047               //                                                                        currentPosition += lookAhead + 1;
 
1051               //                                                        throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1053               //                                                if (getNextChar('\\'))
 
1054               //                                                        scanEscapeCharacter();
 
1055               //                                                else { // consume next character
 
1056               //                                                        unicodeAsBackSlash = false;
 
1057               //                                                        if (((currentCharacter = source[currentPosition++])
 
1059               //                                                                && (source[currentPosition] == 'u')) {
 
1060               //                                                                getNextUnicodeChar();
 
1062               //                                                                if (withoutUnicodePtr != 0) {
 
1063               //                                                                        withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1064               //                                                                                currentCharacter;
 
1068               //                                                //            if (getNextChar('\''))
 
1069               //                                                //              return TokenNameCharacterLiteral;
 
1070               //                                                // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1071               //                                                for (int lookAhead = 0; lookAhead < 20; lookAhead++) {
 
1072               //                                                        if (currentPosition + lookAhead == source.length)
 
1074               //                                                        if (source[currentPosition + lookAhead] == '\n')
 
1076               //                                                        if (source[currentPosition + lookAhead] == '\'') {
 
1077               //                                                                currentPosition += lookAhead + 1;
 
1081               //                                                throw new InvalidInputException(INVALID_CHARACTER_CONSTANT);
 
1084                 // consume next character
 
1085                 unicodeAsBackSlash = false;
 
1086                 currentCharacter = source[currentPosition++];
 
1087 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1088 //                  && (source[currentPosition] == 'u')) {
 
1089 //                  getNextUnicodeChar();
 
1091 //                  if (withoutUnicodePtr != 0) {
 
1092 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1093 //                      currentCharacter;
 
1097                 while (currentCharacter != '\'') {
 
1099                   /**** in PHP \r and \n are valid in string literals ****/
 
1100                   //                  if ((currentCharacter == '\n')
 
1101                   //                    || (currentCharacter == '\r')) {
 
1102                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1103                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1104                   //                      if (currentPosition + lookAhead == source.length)
 
1106                   //                      if (source[currentPosition + lookAhead] == '\n')
 
1108                   //                      if (source[currentPosition + lookAhead] == '\"') {
 
1109                   //                        currentPosition += lookAhead + 1;
 
1113                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 
1115                   if (currentCharacter == '\\') {
 
1116                     int escapeSize = currentPosition;
 
1117                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 
1118                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 
1119                     scanSingleQuotedEscapeCharacter();
 
1120                     escapeSize = currentPosition - escapeSize;
 
1121                     if (withoutUnicodePtr == 0) {
 
1122                       //buffer all the entries that have been left aside....
 
1124                         currentPosition - escapeSize - 1 - startPosition;
 
1128                         withoutUnicodeBuffer,
 
1131                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1133                     } else { //overwrite the / in the buffer
 
1134                       withoutUnicodeBuffer[withoutUnicodePtr] =
 
1136                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 
1137                         withoutUnicodePtr--;
 
1141                   // consume next character
 
1142                   unicodeAsBackSlash = false;
 
1143                   currentCharacter = source[currentPosition++];
 
1144 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1145 //                    && (source[currentPosition] == 'u')) {
 
1146 //                    getNextUnicodeChar();
 
1148                     if (withoutUnicodePtr != 0) {
 
1149                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1155               } catch (IndexOutOfBoundsException e) {
 
1156                 throw new InvalidInputException(UNTERMINATED_STRING);
 
1157               } catch (InvalidInputException e) {
 
1158                 if (e.getMessage().equals(INVALID_ESCAPE)) {
 
1159                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1160                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1161                     if (currentPosition + lookAhead == source.length)
 
1163                     if (source[currentPosition + lookAhead] == '\n')
 
1165                     if (source[currentPosition + lookAhead] == '\'') {
 
1166                       currentPosition += lookAhead + 1;
 
1174               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
 
1175                 if (currentLine == null) {
 
1176                   currentLine = new NLSLine();
 
1177                   lines.add(currentLine);
 
1181                     getCurrentTokenSourceString(),
 
1183                     currentPosition - 1));
 
1185               return TokenNameStringConstant;
 
1188                 // consume next character
 
1189                 unicodeAsBackSlash = false;
 
1190                 currentCharacter = source[currentPosition++];
 
1191 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1192 //                  && (source[currentPosition] == 'u')) {
 
1193 //                  getNextUnicodeChar();
 
1195 //                  if (withoutUnicodePtr != 0) {
 
1196 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1197 //                      currentCharacter;
 
1201                 while (currentCharacter != '"') {
 
1203                   /**** in PHP \r and \n are valid in string literals ****/
 
1204                   //                  if ((currentCharacter == '\n')
 
1205                   //                    || (currentCharacter == '\r')) {
 
1206                   //                    // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1207                   //                    for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1208                   //                      if (currentPosition + lookAhead == source.length)
 
1210                   //                      if (source[currentPosition + lookAhead] == '\n')
 
1212                   //                      if (source[currentPosition + lookAhead] == '\"') {
 
1213                   //                        currentPosition += lookAhead + 1;
 
1217                   //                    throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 
1219                   if (currentCharacter == '\\') {
 
1220                     int escapeSize = currentPosition;
 
1221                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 
1222                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 
1223                     scanDoubleQuotedEscapeCharacter();
 
1224                     escapeSize = currentPosition - escapeSize;
 
1225                     if (withoutUnicodePtr == 0) {
 
1226                       //buffer all the entries that have been left aside....
 
1228                         currentPosition - escapeSize - 1 - startPosition;
 
1232                         withoutUnicodeBuffer,
 
1235                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1237                     } else { //overwrite the / in the buffer
 
1238                       withoutUnicodeBuffer[withoutUnicodePtr] =
 
1240                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 
1241                         withoutUnicodePtr--;
 
1245                   // consume next character
 
1246                   unicodeAsBackSlash = false;
 
1247                   currentCharacter = source[currentPosition++];
 
1248 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1249 //                    && (source[currentPosition] == 'u')) {
 
1250 //                    getNextUnicodeChar();
 
1252                     if (withoutUnicodePtr != 0) {
 
1253                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1259               } catch (IndexOutOfBoundsException e) {
 
1260                 throw new InvalidInputException(UNTERMINATED_STRING);
 
1261               } catch (InvalidInputException e) {
 
1262                 if (e.getMessage().equals(INVALID_ESCAPE)) {
 
1263                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1264                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1265                     if (currentPosition + lookAhead == source.length)
 
1267                     if (source[currentPosition + lookAhead] == '\n')
 
1269                     if (source[currentPosition + lookAhead] == '\"') {
 
1270                       currentPosition += lookAhead + 1;
 
1278               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
 
1279                 if (currentLine == null) {
 
1280                   currentLine = new NLSLine();
 
1281                   lines.add(currentLine);
 
1285                     getCurrentTokenSourceString(),
 
1287                     currentPosition - 1));
 
1289               return TokenNameStringLiteral;
 
1292                 // consume next character
 
1293                 unicodeAsBackSlash = false;
 
1294                 currentCharacter = source[currentPosition++];
 
1295 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1296 //                  && (source[currentPosition] == 'u')) {
 
1297 //                  getNextUnicodeChar();
 
1299 //                  if (withoutUnicodePtr != 0) {
 
1300 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1301 //                      currentCharacter;
 
1305                 while (currentCharacter != '`') {
 
1307                   /**** in PHP \r and \n are valid in string literals ****/
 
1308                   //                if ((currentCharacter == '\n')
 
1309                   //                  || (currentCharacter == '\r')) {
 
1310                   //                  // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1311                   //                  for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1312                   //                    if (currentPosition + lookAhead == source.length)
 
1314                   //                    if (source[currentPosition + lookAhead] == '\n')
 
1316                   //                    if (source[currentPosition + lookAhead] == '\"') {
 
1317                   //                      currentPosition += lookAhead + 1;
 
1321                   //                  throw new InvalidInputException(INVALID_CHAR_IN_STRING);
 
1323                   if (currentCharacter == '\\') {
 
1324                     int escapeSize = currentPosition;
 
1325                     boolean backSlashAsUnicodeInString = unicodeAsBackSlash;
 
1326                     //scanEscapeCharacter make a side effect on this value and we need the previous value few lines down this one
 
1327                     scanDoubleQuotedEscapeCharacter();
 
1328                     escapeSize = currentPosition - escapeSize;
 
1329                     if (withoutUnicodePtr == 0) {
 
1330                       //buffer all the entries that have been left aside....
 
1332                         currentPosition - escapeSize - 1 - startPosition;
 
1336                         withoutUnicodeBuffer,
 
1339                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1341                     } else { //overwrite the / in the buffer
 
1342                       withoutUnicodeBuffer[withoutUnicodePtr] =
 
1344                       if (backSlashAsUnicodeInString) { //there are TWO \ in the stream where only one is correct
 
1345                         withoutUnicodePtr--;
 
1349                   // consume next character
 
1350                   unicodeAsBackSlash = false;
 
1351                   currentCharacter = source[currentPosition++];
 
1352 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1353 //                    && (source[currentPosition] == 'u')) {
 
1354 //                    getNextUnicodeChar();
 
1356                     if (withoutUnicodePtr != 0) {
 
1357                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1363               } catch (IndexOutOfBoundsException e) {
 
1364                 throw new InvalidInputException(UNTERMINATED_STRING);
 
1365               } catch (InvalidInputException e) {
 
1366                 if (e.getMessage().equals(INVALID_ESCAPE)) {
 
1367                   // relocate if finding another quote fairly close: thus unicode '/u000D' will be fully consumed
 
1368                   for (int lookAhead = 0; lookAhead < 50; lookAhead++) {
 
1369                     if (currentPosition + lookAhead == source.length)
 
1371                     if (source[currentPosition + lookAhead] == '\n')
 
1373                     if (source[currentPosition + lookAhead] == '`') {
 
1374                       currentPosition += lookAhead + 1;
 
1382               if (checkNonExternalizedStringLiterals) { // check for presence of        NLS tags //$NON-NLS-?$ where ? is an int.
 
1383                 if (currentLine == null) {
 
1384                   currentLine = new NLSLine();
 
1385                   lines.add(currentLine);
 
1389                     getCurrentTokenSourceString(),
 
1391                     currentPosition - 1));
 
1393               return TokenNameStringInterpolated;
 
1398                 if ((currentCharacter == '#')
 
1399                   || (test = getNextChar('/', '*')) == 0) {
 
1401                   int endPositionForLineComment = 0;
 
1402                   try { //get the next char 
 
1403                     currentCharacter = source[currentPosition++];
 
1404 //                    if (((currentCharacter = source[currentPosition++])
 
1406 //                      && (source[currentPosition] == 'u')) {
 
1407 //                      //-------------unicode traitement ------------
 
1408 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1409 //                      currentPosition++;
 
1410 //                      while (source[currentPosition] == 'u') {
 
1411 //                        currentPosition++;
 
1414 //                        Character.getNumericValue(source[currentPosition++]))
 
1418 //                          Character.getNumericValue(source[currentPosition++]))
 
1422 //                          Character.getNumericValue(source[currentPosition++]))
 
1426 //                          Character.getNumericValue(source[currentPosition++]))
 
1429 //                        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1431 //                        currentCharacter =
 
1432 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1436                     //handle the \\u case manually into comment
 
1437 //                    if (currentCharacter == '\\') {
 
1438 //                      if (source[currentPosition] == '\\')
 
1439 //                        currentPosition++;
 
1440 //                    } //jump over the \\
 
1441                     boolean isUnicode = false;
 
1442                     while (currentCharacter != '\r'
 
1443                       && currentCharacter != '\n') {
 
1444                       if (currentCharacter == '?') {
 
1445                         if (getNextChar('>')) {
 
1446                           startPosition = currentPosition - 2;
 
1448                           return TokenNameStopPHP;
 
1454                       currentCharacter = source[currentPosition++];
 
1455 //                      if (((currentCharacter = source[currentPosition++])
 
1457 //                        && (source[currentPosition] == 'u')) {
 
1458 //                        isUnicode = true;
 
1459 //                        //-------------unicode traitement ------------
 
1460 //                        int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1461 //                        currentPosition++;
 
1462 //                        while (source[currentPosition] == 'u') {
 
1463 //                          currentPosition++;
 
1466 //                          Character.getNumericValue(source[currentPosition++]))
 
1470 //                            Character.getNumericValue(
 
1471 //                              source[currentPosition++]))
 
1475 //                            Character.getNumericValue(
 
1476 //                              source[currentPosition++]))
 
1480 //                            Character.getNumericValue(
 
1481 //                              source[currentPosition++]))
 
1484 //                          throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1486 //                          currentCharacter =
 
1487 //                            (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1490                       //handle the \\u case manually into comment
 
1491 //                      if (currentCharacter == '\\') {
 
1492 //                        if (source[currentPosition] == '\\')
 
1493 //                          currentPosition++;
 
1494 //                      } //jump over the \\
 
1497                       endPositionForLineComment = currentPosition - 6;
 
1499                       endPositionForLineComment = currentPosition - 1;
 
1501                     recordComment(false);
 
1502                     if ((currentCharacter == '\r')
 
1503                       || (currentCharacter == '\n')) {
 
1504                       checkNonExternalizeString();
 
1505                       if (recordLineSeparator) {
 
1507                           pushUnicodeLineSeparator();
 
1509                           pushLineSeparator();
 
1515                     if (tokenizeComments) {
 
1517                         currentPosition = endPositionForLineComment;
 
1518                         // reset one character behind
 
1520                       return TokenNameCOMMENT_LINE;
 
1522                   } catch (IndexOutOfBoundsException e) { //an eof will them be generated
 
1523                     if (tokenizeComments) {
 
1525                       // reset one character behind
 
1526                       return TokenNameCOMMENT_LINE;
 
1532                   //traditional and annotation comment
 
1533                   boolean isJavadoc = false, star = false;
 
1534                   // consume next character
 
1535                   unicodeAsBackSlash = false;
 
1536                   currentCharacter = source[currentPosition++];
 
1537 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1538 //                    && (source[currentPosition] == 'u')) {
 
1539 //                    getNextUnicodeChar();
 
1541 //                    if (withoutUnicodePtr != 0) {
 
1542 //                      withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1543 //                        currentCharacter;
 
1547                   if (currentCharacter == '*') {
 
1551                   if ((currentCharacter == '\r')
 
1552                     || (currentCharacter == '\n')) {
 
1553                     checkNonExternalizeString();
 
1554                     if (recordLineSeparator) {
 
1555                       pushLineSeparator();
 
1560                   try { //get the next char 
 
1561                     currentCharacter = source[currentPosition++];
 
1562 //                    if (((currentCharacter = source[currentPosition++])
 
1564 //                      && (source[currentPosition] == 'u')) {
 
1565 //                      //-------------unicode traitement ------------
 
1566 //                      getNextUnicodeChar();
 
1568                     //handle the \\u case manually into comment
 
1569 //                    if (currentCharacter == '\\') {
 
1570 //                      if (source[currentPosition] == '\\')
 
1571 //                        currentPosition++;
 
1572 //                      //jump over the \\
 
1574                     // empty comment is not a javadoc /**/
 
1575                     if (currentCharacter == '/') {
 
1578                     //loop until end of comment */
 
1579                     while ((currentCharacter != '/') || (!star)) {
 
1580                       if ((currentCharacter == '\r')
 
1581                         || (currentCharacter == '\n')) {
 
1582                         checkNonExternalizeString();
 
1583                         if (recordLineSeparator) {
 
1584                           pushLineSeparator();
 
1589                       star = currentCharacter == '*';
 
1591                       currentCharacter = source[currentPosition++];
 
1592 //                      if (((currentCharacter = source[currentPosition++])
 
1594 //                        && (source[currentPosition] == 'u')) {
 
1595 //                        //-------------unicode traitement ------------
 
1596 //                        getNextUnicodeChar();
 
1598                       //handle the \\u case manually into comment
 
1599 //                      if (currentCharacter == '\\') {
 
1600 //                        if (source[currentPosition] == '\\')
 
1601 //                          currentPosition++;
 
1602 //                      } //jump over the \\
 
1604                     recordComment(isJavadoc);
 
1605                     if (tokenizeComments) {
 
1607                         return TokenNameCOMMENT_PHPDOC;
 
1608                       return TokenNameCOMMENT_BLOCK;
 
1610                   } catch (IndexOutOfBoundsException e) {
 
1611                     throw new InvalidInputException(UNTERMINATED_COMMENT);
 
1615                 if (getNextChar('='))
 
1616                   return TokenNameDIVIDE_EQUAL;
 
1617                 return TokenNameDIVIDE;
 
1621                 return TokenNameEOF;
 
1622               //the atEnd may not be <currentPosition == source.length> if source is only some part of a real (external) stream
 
1623               throw new InvalidInputException("Ctrl-Z"); //$NON-NLS-1$
 
1626               if (currentCharacter == '$') {
 
1627                 while ((currentCharacter = source[currentPosition++]) == '$') {
 
1629                 if (currentCharacter == '{')
 
1630                   return TokenNameDOLLAR_LBRACE;
 
1631                 if (isPHPIdentifierStart(currentCharacter))
 
1632                   return scanIdentifierOrKeyword(true);
 
1633                 return TokenNameERROR;
 
1635               if (isPHPIdentifierStart(currentCharacter))
 
1636                 return scanIdentifierOrKeyword(false);
 
1637               if (Character.isDigit(currentCharacter))
 
1638                 return scanNumber(false);
 
1639               return TokenNameERROR;
 
1642       } //-----------------end switch while try--------------------
 
1643       catch (IndexOutOfBoundsException e) {
 
1646     return TokenNameEOF;
 
1649 //  public final void getNextUnicodeChar()
 
1650 //    throws IndexOutOfBoundsException, InvalidInputException {
 
1652 //    //handle the case of unicode.
 
1653 //    //when a unicode appears then we must use a buffer that holds char internal values
 
1654 //    //At the end of this method currentCharacter holds the new visited char
 
1655 //    //and currentPosition points right next after it
 
1657 //    //ALL getNextChar.... ARE OPTIMIZED COPIES 
 
1659 //    int c1 = 0, c2 = 0, c3 = 0, c4 = 0, unicodeSize = 6;
 
1660 //    currentPosition++;
 
1661 //    while (source[currentPosition] == 'u') {
 
1662 //      currentPosition++;
 
1666 //    if ((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
1668 //      || (c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
1670 //      || (c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
1672 //      || (c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
1674 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
1676 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1677 //      //need the unicode buffer
 
1678 //      if (withoutUnicodePtr == 0) {
 
1679 //        //buffer all the entries that have been left aside....
 
1680 //        withoutUnicodePtr = currentPosition - unicodeSize - startPosition;
 
1681 //        System.arraycopy(
 
1684 //          withoutUnicodeBuffer,
 
1686 //          withoutUnicodePtr);
 
1688 //      //fill the buffer with the char
 
1689 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
1691 //    unicodeAsBackSlash = currentCharacter == '\\';
 
1693   /* Tokenize a method body, assuming that curly brackets are properly balanced.
 
1695   public final void jumpOverMethodBody() {
 
1697     this.wasAcr = false;
 
1700       while (true) { //loop for jumping over comments
 
1701         // ---------Consume white space and handles startPosition---------
 
1702         boolean isWhiteSpace;
 
1704           startPosition = currentPosition;
 
1705           currentCharacter = source[currentPosition++];
 
1706 //          if (((currentCharacter = source[currentPosition++]) == '\\')
 
1707 //            && (source[currentPosition] == 'u')) {
 
1708 //            isWhiteSpace = jumpOverUnicodeWhiteSpace();
 
1710             if (recordLineSeparator
 
1711               && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1712               pushLineSeparator();
 
1713             isWhiteSpace = Character.isWhitespace(currentCharacter);
 
1715         } while (isWhiteSpace);
 
1717         // -------consume token until } is found---------
 
1718         switch (currentCharacter) {
 
1730               test = getNextChar('\\');
 
1733                   scanDoubleQuotedEscapeCharacter();
 
1734                 } catch (InvalidInputException ex) {
 
1737 //                try { // consume next character
 
1738                   unicodeAsBackSlash = false;
 
1739                   currentCharacter = source[currentPosition++];
 
1740 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1741 //                    && (source[currentPosition] == 'u')) {
 
1742 //                    getNextUnicodeChar();
 
1744                     if (withoutUnicodePtr != 0) {
 
1745                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1749 //                } catch (InvalidInputException ex) {
 
1757 //              try { // consume next character
 
1758                 unicodeAsBackSlash = false;
 
1759                 currentCharacter = source[currentPosition++];
 
1760 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
1761 //                  && (source[currentPosition] == 'u')) {
 
1762 //                  getNextUnicodeChar();
 
1764                   if (withoutUnicodePtr != 0) {
 
1765                     withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1769 //              } catch (InvalidInputException ex) {
 
1771               while (currentCharacter != '"') {
 
1772                 if (currentCharacter == '\r') {
 
1773                   if (source[currentPosition] == '\n')
 
1776                   // the string cannot go further that the line
 
1778                 if (currentCharacter == '\n') {
 
1780                   // the string cannot go further that the line
 
1782                 if (currentCharacter == '\\') {
 
1784                     scanDoubleQuotedEscapeCharacter();
 
1785                   } catch (InvalidInputException ex) {
 
1788 //                try { // consume next character
 
1789                   unicodeAsBackSlash = false;
 
1790                   currentCharacter = source[currentPosition++];
 
1791 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1792 //                    && (source[currentPosition] == 'u')) {
 
1793 //                    getNextUnicodeChar();
 
1795                     if (withoutUnicodePtr != 0) {
 
1796                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1800 //                } catch (InvalidInputException ex) {
 
1803             } catch (IndexOutOfBoundsException e) {
 
1810               if ((test = getNextChar('/', '*')) == 0) {
 
1814                   currentCharacter = source[currentPosition++];
 
1815 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1816 //                    && (source[currentPosition] == 'u')) {
 
1817 //                    //-------------unicode traitement ------------
 
1818 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1819 //                    currentPosition++;
 
1820 //                    while (source[currentPosition] == 'u') {
 
1821 //                      currentPosition++;
 
1824 //                      Character.getNumericValue(source[currentPosition++]))
 
1828 //                        Character.getNumericValue(source[currentPosition++]))
 
1832 //                        Character.getNumericValue(source[currentPosition++]))
 
1836 //                        Character.getNumericValue(source[currentPosition++]))
 
1839 //                      //error don't care of the value
 
1840 //                      currentCharacter = 'A';
 
1841 //                    } //something different from \n and \r
 
1843 //                      currentCharacter =
 
1844 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1848                   while (currentCharacter != '\r'
 
1849                     && currentCharacter != '\n') {
 
1851                     currentCharacter = source[currentPosition++];
 
1852 //                    if (((currentCharacter = source[currentPosition++])
 
1854 //                      && (source[currentPosition] == 'u')) {
 
1855 //                      //-------------unicode traitement ------------
 
1856 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1857 //                      currentPosition++;
 
1858 //                      while (source[currentPosition] == 'u') {
 
1859 //                        currentPosition++;
 
1862 //                        Character.getNumericValue(source[currentPosition++]))
 
1866 //                          Character.getNumericValue(source[currentPosition++]))
 
1870 //                          Character.getNumericValue(source[currentPosition++]))
 
1874 //                          Character.getNumericValue(source[currentPosition++]))
 
1877 //                        //error don't care of the value
 
1878 //                        currentCharacter = 'A';
 
1879 //                      } //something different from \n and \r
 
1881 //                        currentCharacter =
 
1882 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1886                   if (recordLineSeparator
 
1887                     && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1888                     pushLineSeparator();
 
1889                 } catch (IndexOutOfBoundsException e) {
 
1890                 } //an eof will them be generated
 
1894                 //traditional and annotation comment
 
1895                 boolean star = false;
 
1896 //                try { // consume next character
 
1897                   unicodeAsBackSlash = false;
 
1898                   currentCharacter = source[currentPosition++];
 
1899 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1900 //                    && (source[currentPosition] == 'u')) {
 
1901 //                    getNextUnicodeChar();
 
1903                     if (withoutUnicodePtr != 0) {
 
1904                       withoutUnicodeBuffer[++withoutUnicodePtr] =
 
1908 //                } catch (InvalidInputException ex) {
 
1910                 if (currentCharacter == '*') {
 
1913                 if (recordLineSeparator
 
1914                   && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
1915                   pushLineSeparator();
 
1916                 try { //get the next char 
 
1917                   currentCharacter = source[currentPosition++];
 
1918 //                  if (((currentCharacter = source[currentPosition++]) == '\\')
 
1919 //                    && (source[currentPosition] == 'u')) {
 
1920 //                    //-------------unicode traitement ------------
 
1921 //                    int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1922 //                    currentPosition++;
 
1923 //                    while (source[currentPosition] == 'u') {
 
1924 //                      currentPosition++;
 
1927 //                      Character.getNumericValue(source[currentPosition++]))
 
1931 //                        Character.getNumericValue(source[currentPosition++]))
 
1935 //                        Character.getNumericValue(source[currentPosition++]))
 
1939 //                        Character.getNumericValue(source[currentPosition++]))
 
1942 //                      //error don't care of the value
 
1943 //                      currentCharacter = 'A';
 
1944 //                    } //something different from * and /
 
1946 //                      currentCharacter =
 
1947 //                        (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1950                   //loop until end of comment */ 
 
1951                   while ((currentCharacter != '/') || (!star)) {
 
1952                     if (recordLineSeparator
 
1953                       && ((currentCharacter == '\r')
 
1954                         || (currentCharacter == '\n')))
 
1955                       pushLineSeparator();
 
1956                     star = currentCharacter == '*';
 
1958                     currentCharacter = source[currentPosition++];
 
1959 //                    if (((currentCharacter = source[currentPosition++])
 
1961 //                      && (source[currentPosition] == 'u')) {
 
1962 //                      //-------------unicode traitement ------------
 
1963 //                      int c1 = 0, c2 = 0, c3 = 0, c4 = 0;
 
1964 //                      currentPosition++;
 
1965 //                      while (source[currentPosition] == 'u') {
 
1966 //                        currentPosition++;
 
1969 //                        Character.getNumericValue(source[currentPosition++]))
 
1973 //                          Character.getNumericValue(source[currentPosition++]))
 
1977 //                          Character.getNumericValue(source[currentPosition++]))
 
1981 //                          Character.getNumericValue(source[currentPosition++]))
 
1984 //                        //error don't care of the value
 
1985 //                        currentCharacter = 'A';
 
1986 //                      } //something different from * and /
 
1988 //                        currentCharacter =
 
1989 //                          (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
1993                 } catch (IndexOutOfBoundsException e) {
 
2002             if (isPHPIdentifierStart(currentCharacter)
 
2003               || currentCharacter == '$') {
 
2005                 scanIdentifierOrKeyword((currentCharacter == '$'));
 
2006               } catch (InvalidInputException ex) {
 
2010             if (Character.isDigit(currentCharacter)) {
 
2013               } catch (InvalidInputException ex) {
 
2019       //-----------------end switch while try--------------------
 
2020     } catch (IndexOutOfBoundsException e) {
 
2021     } catch (InvalidInputException e) {
 
2025 //  public final boolean jumpOverUnicodeWhiteSpace()
 
2026 //    throws InvalidInputException {
 
2028 //    //handle the case of unicode. Jump over the next whiteSpace
 
2029 //    //making startPosition pointing on the next available char
 
2030 //    //On false, the currentCharacter is filled up with a potential
 
2034 //      this.wasAcr = false;
 
2035 //      int c1, c2, c3, c4;
 
2036 //      int unicodeSize = 6;
 
2037 //      currentPosition++;
 
2038 //      while (source[currentPosition] == 'u') {
 
2039 //        currentPosition++;
 
2043 //      if (((c1 = Character.getNumericValue(source[currentPosition++])) > 15
 
2045 //        || ((c2 = Character.getNumericValue(source[currentPosition++])) > 15
 
2047 //        || ((c3 = Character.getNumericValue(source[currentPosition++])) > 15
 
2049 //        || ((c4 = Character.getNumericValue(source[currentPosition++])) > 15
 
2051 //        throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
2054 //      currentCharacter = (char) (((c1 * 16 + c2) * 16 + c3) * 16 + c4);
 
2055 //      if (recordLineSeparator
 
2056 //        && ((currentCharacter == '\r') || (currentCharacter == '\n')))
 
2057 //        pushLineSeparator();
 
2058 //      if (Character.isWhitespace(currentCharacter))
 
2061 //      //buffer the new char which is not a white space
 
2062 //      withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
2063 //      //withoutUnicodePtr == 1 is true here
 
2065 //    } catch (IndexOutOfBoundsException e) {
 
2066 //      throw new InvalidInputException(INVALID_UNICODE_ESCAPE);
 
2069   public final int[] getLineEnds() {
 
2070     //return a bounded copy of this.lineEnds 
 
2073     System.arraycopy(lineEnds, 0, copy = new int[linePtr + 1], 0, linePtr + 1);
 
2077   public char[] getSource() {
 
2080   final char[] optimizedCurrentTokenSource1() {
 
2081     //return always the same char[] build only once
 
2083     //optimization at no speed cost of 99.5 % of the singleCharIdentifier
 
2084     char charOne = source[startPosition];
 
2139         return new char[] { charOne };
 
2143   final char[] optimizedCurrentTokenSource2() {
 
2144     //try to return the same char[] build only once
 
2148       (((c0 = source[startPosition]) << 6) + (c1 = source[startPosition + 1]))
 
2150     char[][] table = charArray_length[0][hash];
 
2152     while (++i < InternalTableSize) {
 
2153       char[] charArray = table[i];
 
2154       if ((c0 == charArray[0]) && (c1 == charArray[1]))
 
2157     //---------other side---------
 
2159     int max = newEntry2;
 
2160     while (++i <= max) {
 
2161       char[] charArray = table[i];
 
2162       if ((c0 == charArray[0]) && (c1 == charArray[1]))
 
2165     //--------add the entry-------
 
2166     if (++max >= InternalTableSize)
 
2169     table[max] = (r = new char[] { c0, c1 });
 
2174   final char[] optimizedCurrentTokenSource3() {
 
2175     //try to return the same char[] build only once
 
2179       (((c0 = source[startPosition]) << 12)
 
2180         + ((c1 = source[startPosition + 1]) << 6)
 
2181         + (c2 = source[startPosition + 2]))
 
2183     char[][] table = charArray_length[1][hash];
 
2185     while (++i < InternalTableSize) {
 
2186       char[] charArray = table[i];
 
2187       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
 
2190     //---------other side---------
 
2192     int max = newEntry3;
 
2193     while (++i <= max) {
 
2194       char[] charArray = table[i];
 
2195       if ((c0 == charArray[0]) && (c1 == charArray[1]) && (c2 == charArray[2]))
 
2198     //--------add the entry-------
 
2199     if (++max >= InternalTableSize)
 
2202     table[max] = (r = new char[] { c0, c1, c2 });
 
2207   final char[] optimizedCurrentTokenSource4() {
 
2208     //try to return the same char[] build only once
 
2210     char c0, c1, c2, c3;
 
2212       ((((long) (c0 = source[startPosition])) << 18)
 
2213         + ((c1 = source[startPosition + 1]) << 12)
 
2214         + ((c2 = source[startPosition + 2]) << 6)
 
2215         + (c3 = source[startPosition + 3]))
 
2217     char[][] table = charArray_length[2][(int) hash];
 
2219     while (++i < InternalTableSize) {
 
2220       char[] charArray = table[i];
 
2221       if ((c0 == charArray[0])
 
2222         && (c1 == charArray[1])
 
2223         && (c2 == charArray[2])
 
2224         && (c3 == charArray[3]))
 
2227     //---------other side---------
 
2229     int max = newEntry4;
 
2230     while (++i <= max) {
 
2231       char[] charArray = table[i];
 
2232       if ((c0 == charArray[0])
 
2233         && (c1 == charArray[1])
 
2234         && (c2 == charArray[2])
 
2235         && (c3 == charArray[3]))
 
2238     //--------add the entry-------
 
2239     if (++max >= InternalTableSize)
 
2242     table[max] = (r = new char[] { c0, c1, c2, c3 });
 
2248   final char[] optimizedCurrentTokenSource5() {
 
2249     //try to return the same char[] build only once
 
2251     char c0, c1, c2, c3, c4;
 
2253       ((((long) (c0 = source[startPosition])) << 24)
 
2254         + (((long) (c1 = source[startPosition + 1])) << 18)
 
2255         + ((c2 = source[startPosition + 2]) << 12)
 
2256         + ((c3 = source[startPosition + 3]) << 6)
 
2257         + (c4 = source[startPosition + 4]))
 
2259     char[][] table = charArray_length[3][(int) hash];
 
2261     while (++i < InternalTableSize) {
 
2262       char[] charArray = table[i];
 
2263       if ((c0 == charArray[0])
 
2264         && (c1 == charArray[1])
 
2265         && (c2 == charArray[2])
 
2266         && (c3 == charArray[3])
 
2267         && (c4 == charArray[4]))
 
2270     //---------other side---------
 
2272     int max = newEntry5;
 
2273     while (++i <= max) {
 
2274       char[] charArray = table[i];
 
2275       if ((c0 == charArray[0])
 
2276         && (c1 == charArray[1])
 
2277         && (c2 == charArray[2])
 
2278         && (c3 == charArray[3])
 
2279         && (c4 == charArray[4]))
 
2282     //--------add the entry-------
 
2283     if (++max >= InternalTableSize)
 
2286     table[max] = (r = new char[] { c0, c1, c2, c3, c4 });
 
2292   final char[] optimizedCurrentTokenSource6() {
 
2293     //try to return the same char[] build only once
 
2295     char c0, c1, c2, c3, c4, c5;
 
2297       ((((long) (c0 = source[startPosition])) << 32)
 
2298         + (((long) (c1 = source[startPosition + 1])) << 24)
 
2299         + (((long) (c2 = source[startPosition + 2])) << 18)
 
2300         + ((c3 = source[startPosition + 3]) << 12)
 
2301         + ((c4 = source[startPosition + 4]) << 6)
 
2302         + (c5 = source[startPosition + 5]))
 
2304     char[][] table = charArray_length[4][(int) hash];
 
2306     while (++i < InternalTableSize) {
 
2307       char[] charArray = table[i];
 
2308       if ((c0 == charArray[0])
 
2309         && (c1 == charArray[1])
 
2310         && (c2 == charArray[2])
 
2311         && (c3 == charArray[3])
 
2312         && (c4 == charArray[4])
 
2313         && (c5 == charArray[5]))
 
2316     //---------other side---------
 
2318     int max = newEntry6;
 
2319     while (++i <= max) {
 
2320       char[] charArray = table[i];
 
2321       if ((c0 == charArray[0])
 
2322         && (c1 == charArray[1])
 
2323         && (c2 == charArray[2])
 
2324         && (c3 == charArray[3])
 
2325         && (c4 == charArray[4])
 
2326         && (c5 == charArray[5]))
 
2329     //--------add the entry-------
 
2330     if (++max >= InternalTableSize)
 
2333     table[max] = (r = new char[] { c0, c1, c2, c3, c4, c5 });
 
2338   public final void pushLineSeparator() throws InvalidInputException {
 
2339     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
 
2340     final int INCREMENT = 250;
 
2342     if (this.checkNonExternalizedStringLiterals) {
 
2343       // reinitialize the current line for non externalize strings purpose
 
2346     //currentCharacter is at position currentPosition-1
 
2349     if (currentCharacter == '\r') {
 
2350       int separatorPos = currentPosition - 1;
 
2351       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2353       //System.out.println("CR-" + separatorPos);
 
2355         lineEnds[++linePtr] = separatorPos;
 
2356       } catch (IndexOutOfBoundsException e) {
 
2357         //linePtr value is correct
 
2358         int oldLength = lineEnds.length;
 
2359         int[] old = lineEnds;
 
2360         lineEnds = new int[oldLength + INCREMENT];
 
2361         System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2362         lineEnds[linePtr] = separatorPos;
 
2364       // look-ahead for merged cr+lf
 
2366         if (source[currentPosition] == '\n') {
 
2367           //System.out.println("look-ahead LF-" + currentPosition);                     
 
2368           lineEnds[linePtr] = currentPosition;
 
2374       } catch (IndexOutOfBoundsException e) {
 
2379       if (currentCharacter == '\n') {
 
2380         //must merge eventual cr followed by lf
 
2381         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 2))) {
 
2382           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
 
2383           lineEnds[linePtr] = currentPosition - 1;
 
2385           int separatorPos = currentPosition - 1;
 
2386           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2388           // System.out.println("LF-" + separatorPos);                                                  
 
2390             lineEnds[++linePtr] = separatorPos;
 
2391           } catch (IndexOutOfBoundsException e) {
 
2392             //linePtr value is correct
 
2393             int oldLength = lineEnds.length;
 
2394             int[] old = lineEnds;
 
2395             lineEnds = new int[oldLength + INCREMENT];
 
2396             System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2397             lineEnds[linePtr] = separatorPos;
 
2404   public final void pushUnicodeLineSeparator() {
 
2405     // isUnicode means that the \r or \n has been read as a unicode character
 
2407     //see comment on isLineDelimiter(char) for the use of '\n' and '\r'
 
2409     final int INCREMENT = 250;
 
2410     //currentCharacter is at position currentPosition-1
 
2412     if (this.checkNonExternalizedStringLiterals) {
 
2413       // reinitialize the current line for non externalize strings purpose
 
2418     if (currentCharacter == '\r') {
 
2419       int separatorPos = currentPosition - 6;
 
2420       if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2422       //System.out.println("CR-" + separatorPos);
 
2424         lineEnds[++linePtr] = separatorPos;
 
2425       } catch (IndexOutOfBoundsException e) {
 
2426         //linePtr value is correct
 
2427         int oldLength = lineEnds.length;
 
2428         int[] old = lineEnds;
 
2429         lineEnds = new int[oldLength + INCREMENT];
 
2430         System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2431         lineEnds[linePtr] = separatorPos;
 
2433       // look-ahead for merged cr+lf
 
2434       if (source[currentPosition] == '\n') {
 
2435         //System.out.println("look-ahead LF-" + currentPosition);                       
 
2436         lineEnds[linePtr] = currentPosition;
 
2444       if (currentCharacter == '\n') {
 
2445         //must merge eventual cr followed by lf
 
2446         if (wasAcr && (lineEnds[linePtr] == (currentPosition - 7))) {
 
2447           //System.out.println("merge LF-" + (currentPosition - 1));                                                    
 
2448           lineEnds[linePtr] = currentPosition - 6;
 
2450           int separatorPos = currentPosition - 6;
 
2451           if ((linePtr > 0) && (lineEnds[linePtr] >= separatorPos))
 
2453           // System.out.println("LF-" + separatorPos);                                                  
 
2455             lineEnds[++linePtr] = separatorPos;
 
2456           } catch (IndexOutOfBoundsException e) {
 
2457             //linePtr value is correct
 
2458             int oldLength = lineEnds.length;
 
2459             int[] old = lineEnds;
 
2460             lineEnds = new int[oldLength + INCREMENT];
 
2461             System.arraycopy(old, 0, lineEnds, 0, oldLength);
 
2462             lineEnds[linePtr] = separatorPos;
 
2469   public final void recordComment(boolean isJavadoc) {
 
2471     // a new annotation comment is recorded
 
2473       commentStops[++commentPtr] =
 
2474         isJavadoc ? currentPosition : -currentPosition;
 
2475     } catch (IndexOutOfBoundsException e) {
 
2476       int oldStackLength = commentStops.length;
 
2477       int[] oldStack = commentStops;
 
2478       commentStops = new int[oldStackLength + 30];
 
2479       System.arraycopy(oldStack, 0, commentStops, 0, oldStackLength);
 
2480       commentStops[commentPtr] = isJavadoc ? currentPosition : -currentPosition;
 
2481       //grows the positions buffers too
 
2482       int[] old = commentStarts;
 
2483       commentStarts = new int[oldStackLength + 30];
 
2484       System.arraycopy(old, 0, commentStarts, 0, oldStackLength);
 
2487     //the buffer is of a correct size here
 
2488     commentStarts[commentPtr] = startPosition;
 
2490   public void resetTo(int begin, int end) {
 
2491     //reset the scanner to a given position where it may rescan again
 
2494     initialPosition = startPosition = currentPosition = begin;
 
2495     eofPosition = end < Integer.MAX_VALUE ? end + 1 : end;
 
2496     commentPtr = -1; // reset comment stack
 
2499   public final void scanSingleQuotedEscapeCharacter()
 
2500     throws InvalidInputException {
 
2501     // the string with "\\u" is a legal string of two chars \ and u
 
2502     //thus we use a direct access to the source (for regular cases).
 
2504 //    if (unicodeAsBackSlash) {
 
2505 //      // consume next character
 
2506 //      unicodeAsBackSlash = false;
 
2507 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
2508 //        && (source[currentPosition] == 'u')) {
 
2509 //        getNextUnicodeChar();
 
2511 //        if (withoutUnicodePtr != 0) {
 
2512 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
2516       currentCharacter = source[currentPosition++];
 
2517     switch (currentCharacter) {
 
2519         currentCharacter = '\'';
 
2522         currentCharacter = '\\';
 
2525         currentCharacter = '\\';
 
2530   public final void scanDoubleQuotedEscapeCharacter()
 
2531     throws InvalidInputException {
 
2532     // the string with "\\u" is a legal string of two chars \ and u
 
2533     //thus we use a direct access to the source (for regular cases).
 
2535 //    if (unicodeAsBackSlash) {
 
2536 //      // consume next character
 
2537 //      unicodeAsBackSlash = false;
 
2538 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
2539 //        && (source[currentPosition] == 'u')) {
 
2540 //        getNextUnicodeChar();
 
2542 //        if (withoutUnicodePtr != 0) {
 
2543 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
2547       currentCharacter = source[currentPosition++];
 
2548     switch (currentCharacter) {
 
2550       //        currentCharacter = '\b';
 
2553         currentCharacter = '\t';
 
2556         currentCharacter = '\n';
 
2559         //        currentCharacter = '\f';
 
2562         currentCharacter = '\r';
 
2565         currentCharacter = '\"';
 
2568         currentCharacter = '\'';
 
2571         currentCharacter = '\\';
 
2574         currentCharacter = '$';
 
2577         // -----------octal escape--------------
 
2579         // OctalDigit OctalDigit
 
2580         // ZeroToThree OctalDigit OctalDigit
 
2582         int number = Character.getNumericValue(currentCharacter);
 
2583         if (number >= 0 && number <= 7) {
 
2584           boolean zeroToThreeNot = number > 3;
 
2586             .isDigit(currentCharacter = source[currentPosition++])) {
 
2587             int digit = Character.getNumericValue(currentCharacter);
 
2588             if (digit >= 0 && digit <= 7) {
 
2589               number = (number * 8) + digit;
 
2591                 .isDigit(currentCharacter = source[currentPosition++])) {
 
2592                 if (zeroToThreeNot) { // has read \NotZeroToThree OctalDigit Digit --> ignore last character
 
2595                   digit = Character.getNumericValue(currentCharacter);
 
2596                   if (digit >= 0 && digit <= 7) {
 
2597                     // has read \ZeroToThree OctalDigit OctalDigit
 
2598                     number = (number * 8) + digit;
 
2599                   } else { // has read \ZeroToThree OctalDigit NonOctalDigit --> ignore last character
 
2603               } else { // has read \OctalDigit NonDigit--> ignore last character
 
2606             } else { // has read \OctalDigit NonOctalDigit--> ignore last character                                             
 
2609           } else { // has read \OctalDigit --> ignore last character
 
2613             throw new InvalidInputException(INVALID_ESCAPE);
 
2614           currentCharacter = (char) number;
 
2617         //     throw new InvalidInputException(INVALID_ESCAPE);
 
2621   //  public int scanIdentifierOrKeyword() throws InvalidInputException {
 
2622   //    return scanIdentifierOrKeyword( false );
 
2625   public int scanIdentifierOrKeyword(boolean isVariable) 
 
2626     throws InvalidInputException {
 
2629     //first dispatch on the first char.
 
2630     //then the length. If there are several
 
2631     //keywors with the same length AND the same first char, then do another
 
2632     //disptach on the second char :-)...cool....but fast !
 
2634     useAssertAsAnIndentifier = false;
 
2636     while (getNextCharAsJavaIdentifierPart()) {
 
2640       return TokenNameVariable;
 
2645 //    if (withoutUnicodePtr == 0)
 
2647       //quick test on length == 1 but not on length > 12 while most identifier
 
2648       //have a length which is <= 12...but there are lots of identifier with
 
2652       if ((length = currentPosition - startPosition) == 1)
 
2653         return TokenNameIdentifier;
 
2655       data = new char[length];
 
2656       index = startPosition;
 
2657       for (int i = 0; i < length; i++) {
 
2658         data[i] = Character.toLowerCase(source[index + i]);
 
2662 //      if ((length = withoutUnicodePtr) == 1)
 
2663 //        return TokenNameIdentifier;
 
2664 //      // data = withoutUnicodeBuffer;
 
2665 //      data = new char[withoutUnicodeBuffer.length];
 
2666 //      for (int i = 0; i < withoutUnicodeBuffer.length; i++) {
 
2667 //        data[i] = Character.toLowerCase(withoutUnicodeBuffer[i]);
 
2672     firstLetter = data[index];
 
2673     switch (firstLetter) {
 
2675       case 'a' : // as and array
 
2678             if ((data[++index] == 's')) {
 
2681               return TokenNameIdentifier;
 
2684             if ((data[++index] == 'n') && (data[++index] == 'd')) {
 
2685               return TokenNameAND;
 
2687               return TokenNameIdentifier;
 
2690             //            if ((data[++index] == 'r') && (data[++index] == 'r') && (data[++index] == 'a') && (data[++index] == 'y'))
 
2691             //              return TokenNamearray;
 
2693             //              return TokenNameIdentifier;
 
2695             return TokenNameIdentifier;
 
2700             if ((data[++index] == 'r')
 
2701               && (data[++index] == 'e')
 
2702               && (data[++index] == 'a')
 
2703               && (data[++index] == 'k'))
 
2704               return TokenNamebreak;
 
2706               return TokenNameIdentifier;
 
2708             return TokenNameIdentifier;
 
2711       case 'c' : //case class continue
 
2714             if ((data[++index] == 'a')
 
2715               && (data[++index] == 's')
 
2716               && (data[++index] == 'e'))
 
2717               return TokenNamecase;
 
2719               return TokenNameIdentifier;
 
2721             if ((data[++index] == 'l')
 
2722               && (data[++index] == 'a')
 
2723               && (data[++index] == 's')
 
2724               && (data[++index] == 's'))
 
2725               return TokenNameclass;
 
2727               return TokenNameIdentifier;
 
2729             if ((data[++index] == 'o')
 
2730               && (data[++index] == 'n')
 
2731               && (data[++index] == 't')
 
2732               && (data[++index] == 'i')
 
2733               && (data[++index] == 'n')
 
2734               && (data[++index] == 'u')
 
2735               && (data[++index] == 'e'))
 
2736               return TokenNamecontinue;
 
2738               return TokenNameIdentifier;
 
2740             return TokenNameIdentifier;
 
2743       case 'd' : //define default do 
 
2746             if ((data[++index] == 'o'))
 
2749               return TokenNameIdentifier;
 
2751             if ((data[++index] == 'e')
 
2752               && (data[++index] == 'f')
 
2753               && (data[++index] == 'i')
 
2754               && (data[++index] == 'n')
 
2755               && (data[++index] == 'e'))
 
2756               return TokenNamedefine;
 
2758               return TokenNameIdentifier;
 
2760             if ((data[++index] == 'e')
 
2761               && (data[++index] == 'f')
 
2762               && (data[++index] == 'a')
 
2763               && (data[++index] == 'u')
 
2764               && (data[++index] == 'l')
 
2765               && (data[++index] == 't'))
 
2766               return TokenNamedefault;
 
2768               return TokenNameIdentifier;
 
2770             return TokenNameIdentifier;
 
2772       case 'e' : //echo else elseif extends
 
2775             if ((data[++index] == 'c')
 
2776               && (data[++index] == 'h')
 
2777               && (data[++index] == 'o'))
 
2778               return TokenNameecho;
 
2780               (data[index] == 'l')
 
2781                 && (data[++index] == 's')
 
2782                 && (data[++index] == 'e'))
 
2783               return TokenNameelse;
 
2785               return TokenNameIdentifier;
 
2787             if ((data[++index] == 'n')
 
2788               && (data[++index] == 'd')
 
2789               && (data[++index] == 'i')
 
2790               && (data[++index] == 'f'))
 
2791               return TokenNameendif;
 
2793               return TokenNameIdentifier;
 
2795             if ((data[++index] == 'n')
 
2796               && (data[++index] == 'd')
 
2797               && (data[++index] == 'f')
 
2798               && (data[++index] == 'o')
 
2799               && (data[++index] == 'r'))
 
2800               return TokenNameendfor;
 
2802               (data[index] == 'l')
 
2803                 && (data[++index] == 's')
 
2804                 && (data[++index] == 'e')
 
2805                 && (data[++index] == 'i')
 
2806                 && (data[++index] == 'f'))
 
2807               return TokenNameelseif;
 
2809               return TokenNameIdentifier;
 
2811             if ((data[++index] == 'x')
 
2812               && (data[++index] == 't')
 
2813               && (data[++index] == 'e')
 
2814               && (data[++index] == 'n')
 
2815               && (data[++index] == 'd')
 
2816               && (data[++index] == 's'))
 
2817               return TokenNameextends;
 
2819               return TokenNameIdentifier;
 
2820           case 8 : // endwhile
 
2821             if ((data[++index] == 'n')
 
2822               && (data[++index] == 'd')
 
2823               && (data[++index] == 'w')
 
2824               && (data[++index] == 'h')
 
2825               && (data[++index] == 'i')
 
2826               && (data[++index] == 'l')
 
2827               && (data[++index] == 'e'))
 
2828               return TokenNameendwhile;
 
2830               return TokenNameIdentifier;
 
2831           case 9 : // endswitch
 
2832             if ((data[++index] == 'n')
 
2833               && (data[++index] == 'd')
 
2834               && (data[++index] == 's')
 
2835               && (data[++index] == 'w')
 
2836               && (data[++index] == 'i')
 
2837               && (data[++index] == 't')
 
2838               && (data[++index] == 'c')
 
2839               && (data[++index] == 'h'))
 
2840               return TokenNameendswitch;
 
2842               return TokenNameIdentifier;
 
2843           case 10 : // endforeach
 
2844             if ((data[++index] == 'n')
 
2845               && (data[++index] == 'd')
 
2846               && (data[++index] == 'f')
 
2847               && (data[++index] == 'o')
 
2848               && (data[++index] == 'r')
 
2849               && (data[++index] == 'e')
 
2850               && (data[++index] == 'a')
 
2851               && (data[++index] == 'c')
 
2852               && (data[++index] == 'h'))
 
2853               return TokenNameendforeach;
 
2855               return TokenNameIdentifier;
 
2858             return TokenNameIdentifier;
 
2861       case 'f' : //for false function
 
2864             if ((data[++index] == 'o') && (data[++index] == 'r'))
 
2865               return TokenNamefor;
 
2867               return TokenNameIdentifier;
 
2869             if ((data[++index] == 'a')
 
2870               && (data[++index] == 'l')
 
2871               && (data[++index] == 's')
 
2872               && (data[++index] == 'e'))
 
2873               return TokenNamefalse;
 
2875               return TokenNameIdentifier;
 
2876           case 7 : // function
 
2877             if ((data[++index] == 'o')
 
2878               && (data[++index] == 'r')
 
2879               && (data[++index] == 'e')
 
2880               && (data[++index] == 'a')
 
2881               && (data[++index] == 'c')
 
2882               && (data[++index] == 'h'))
 
2883               return TokenNameforeach;
 
2885               return TokenNameIdentifier;
 
2886           case 8 : // function
 
2887             if ((data[++index] == 'u')
 
2888               && (data[++index] == 'n')
 
2889               && (data[++index] == 'c')
 
2890               && (data[++index] == 't')
 
2891               && (data[++index] == 'i')
 
2892               && (data[++index] == 'o')
 
2893               && (data[++index] == 'n'))
 
2894               return TokenNamefunction;
 
2896               return TokenNameIdentifier;
 
2898             return TokenNameIdentifier;
 
2902           if ((data[++index] == 'l')
 
2903             && (data[++index] == 'o')
 
2904             && (data[++index] == 'b')
 
2905             && (data[++index] == 'a')
 
2906             && (data[++index] == 'l')) {
 
2907             return TokenNameglobal;
 
2910         return TokenNameIdentifier;
 
2915             if (data[++index] == 'f')
 
2918               return TokenNameIdentifier;
 
2920             //            if ((data[++index] == 'n') && (data[++index] == 't'))
 
2921             //              return TokenNameint;
 
2923             //              return TokenNameIdentifier;
 
2925             if ((data[++index] == 'n')
 
2926               && (data[++index] == 'c')
 
2927               && (data[++index] == 'l')
 
2928               && (data[++index] == 'u')
 
2929               && (data[++index] == 'd')
 
2930               && (data[++index] == 'e'))
 
2931               return TokenNameinclude;
 
2933               return TokenNameIdentifier;
 
2935             if ((data[++index] == 'n')
 
2936               && (data[++index] == 'c')
 
2937               && (data[++index] == 'l')
 
2938               && (data[++index] == 'u')
 
2939               && (data[++index] == 'd')
 
2940               && (data[++index] == 'e')
 
2941               && (data[++index] == '_')
 
2942               && (data[++index] == 'o')
 
2943               && (data[++index] == 'n')
 
2944               && (data[++index] == 'c')
 
2945               && (data[++index] == 'e'))
 
2946               return TokenNameinclude_once;
 
2948               return TokenNameIdentifier;
 
2950             return TokenNameIdentifier;
 
2955           if ((data[++index] == 'i')
 
2956             && (data[++index] == 's')
 
2957             && (data[++index] == 't')) {
 
2958             return TokenNamelist;
 
2961         return TokenNameIdentifier;
 
2963       case 'n' : // new null
 
2966             if ((data[++index] == 'e') && (data[++index] == 'w'))
 
2967               return TokenNamenew;
 
2969               return TokenNameIdentifier;
 
2971             if ((data[++index] == 'u')
 
2972               && (data[++index] == 'l')
 
2973               && (data[++index] == 'l'))
 
2974               return TokenNamenull;
 
2976               return TokenNameIdentifier;
 
2979             return TokenNameIdentifier;
 
2981       case 'o' : // or old_function
 
2983           if (data[++index] == 'r') {
 
2987         //        if (length == 12) {
 
2988         //          if ((data[++index] == 'l')
 
2989         //            && (data[++index] == 'd')
 
2990         //            && (data[++index] == '_')
 
2991         //            && (data[++index] == 'f')
 
2992         //            && (data[++index] == 'u')
 
2993         //            && (data[++index] == 'n')
 
2994         //            && (data[++index] == 'c')
 
2995         //            && (data[++index] == 't')
 
2996         //            && (data[++index] == 'i')
 
2997         //            && (data[++index] == 'o')
 
2998         //            && (data[++index] == 'n')) {
 
2999         //            return TokenNameold_function;
 
3002         return TokenNameIdentifier;
 
3006           if ((data[++index] == 'r')
 
3007             && (data[++index] == 'i')
 
3008             && (data[++index] == 'n')
 
3009             && (data[++index] == 't')) {
 
3010             return TokenNameprint;
 
3013         return TokenNameIdentifier;
 
3014       case 'r' : //return require require_once
 
3016           if ((data[++index] == 'e')
 
3017             && (data[++index] == 't')
 
3018             && (data[++index] == 'u')
 
3019             && (data[++index] == 'r')
 
3020             && (data[++index] == 'n')) {
 
3021             return TokenNamereturn;
 
3023         } else if (length == 7) {
 
3024           if ((data[++index] == 'e')
 
3025             && (data[++index] == 'q')
 
3026             && (data[++index] == 'u')
 
3027             && (data[++index] == 'i')
 
3028             && (data[++index] == 'r')
 
3029             && (data[++index] == 'e')) {
 
3030             return TokenNamerequire;
 
3032         } else if (length == 12) {
 
3033           if ((data[++index] == 'e')
 
3034             && (data[++index] == 'q')
 
3035             && (data[++index] == 'u')
 
3036             && (data[++index] == 'i')
 
3037             && (data[++index] == 'r')
 
3038             && (data[++index] == 'e')
 
3039             && (data[++index] == '_')
 
3040             && (data[++index] == 'o')
 
3041             && (data[++index] == 'n')
 
3042             && (data[++index] == 'c')
 
3043             && (data[++index] == 'e')) {
 
3044             return TokenNamerequire_once;
 
3047           return TokenNameIdentifier;
 
3049       case 's' : //static switch 
 
3052             if (data[++index] == 't')
 
3053               if ((data[++index] == 'a')
 
3054                 && (data[++index] == 't')
 
3055                 && (data[++index] == 'i')
 
3056                 && (data[++index] == 'c')) {
 
3057                 return TokenNamestatic;
 
3059                 return TokenNameIdentifier;
 
3061               (data[index] == 'w')
 
3062                 && (data[++index] == 'i')
 
3063                 && (data[++index] == 't')
 
3064                 && (data[++index] == 'c')
 
3065                 && (data[++index] == 'h'))
 
3066               return TokenNameswitch;
 
3068               return TokenNameIdentifier;
 
3070             return TokenNameIdentifier;
 
3077             if ((data[++index] == 'r')
 
3078               && (data[++index] == 'u')
 
3079               && (data[++index] == 'e'))
 
3080               return TokenNametrue;
 
3082               return TokenNameIdentifier;
 
3083             //            if ((data[++index] == 'h') && (data[++index] == 'i') && (data[++index] == 's'))
 
3084             //              return TokenNamethis;
 
3087             return TokenNameIdentifier;
 
3093             if ((data[++index] == 'a') && (data[++index] == 'r'))
 
3094               return TokenNamevar;
 
3096               return TokenNameIdentifier;
 
3099             return TokenNameIdentifier;
 
3105             if ((data[++index] == 'h')
 
3106               && (data[++index] == 'i')
 
3107               && (data[++index] == 'l')
 
3108               && (data[++index] == 'e'))
 
3109               return TokenNamewhile;
 
3111               return TokenNameIdentifier;
 
3112             //case 6:if ( (data[++index] =='i') && (data[++index]=='d') && (data[++index]=='e') && (data[++index]=='f')&& (data[++index]=='p'))
 
3113             //return TokenNamewidefp ;
 
3115             //return TokenNameIdentifier;
 
3117             return TokenNameIdentifier;
 
3123             if ((data[++index] == 'o') && (data[++index] == 'r'))
 
3124               return TokenNameXOR;
 
3126               return TokenNameIdentifier;
 
3129             return TokenNameIdentifier;
 
3132         return TokenNameIdentifier;
 
3135   public int scanNumber(boolean dotPrefix) throws InvalidInputException {
 
3137     //when entering this method the currentCharacter is the firt
 
3138     //digit of the number , i.e. it may be preceeded by a . when
 
3141     boolean floating = dotPrefix;
 
3142     if ((!dotPrefix) && (currentCharacter == '0')) {
 
3143       if (getNextChar('x', 'X') >= 0) { //----------hexa-----------------
 
3144         //force the first char of the hexa number do exist...
 
3145         // consume next character
 
3146         unicodeAsBackSlash = false;
 
3147         currentCharacter = source[currentPosition++];
 
3148 //        if (((currentCharacter = source[currentPosition++]) == '\\')
 
3149 //          && (source[currentPosition] == 'u')) {
 
3150 //          getNextUnicodeChar();
 
3152 //          if (withoutUnicodePtr != 0) {
 
3153 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3156         if (Character.digit(currentCharacter, 16) == -1)
 
3157           throw new InvalidInputException(INVALID_HEXA);
 
3159         while (getNextCharAsDigit(16)) {
 
3161         //        if (getNextChar('l', 'L') >= 0)
 
3162         //          return TokenNameLongLiteral;
 
3164         return TokenNameIntegerLiteral;
 
3167       //there is x or X in the number
 
3168       //potential octal ! ... some one may write 000099.0 ! thus 00100 < 00078.0 is true !!!!! crazy language
 
3169       if (getNextCharAsDigit()) {
 
3170         //-------------potential octal-----------------
 
3171         while (getNextCharAsDigit()) {
 
3174         //        if (getNextChar('l', 'L') >= 0) {
 
3175         //          return TokenNameLongLiteral;
 
3178         //        if (getNextChar('f', 'F') >= 0) {
 
3179         //          return TokenNameFloatingPointLiteral;
 
3182         if (getNextChar('d', 'D') >= 0) {
 
3183           return TokenNameDoubleLiteral;
 
3184         } else { //make the distinction between octal and float ....
 
3185           if (getNextChar('.')) { //bingo ! ....
 
3186             while (getNextCharAsDigit()) {
 
3188             if (getNextChar('e', 'E') >= 0) {
 
3189               // consume next character
 
3190               unicodeAsBackSlash = false;
 
3191               currentCharacter = source[currentPosition++];
 
3192 //              if (((currentCharacter = source[currentPosition++]) == '\\')
 
3193 //                && (source[currentPosition] == 'u')) {
 
3194 //                getNextUnicodeChar();
 
3196 //                if (withoutUnicodePtr != 0) {
 
3197 //                  withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3201               if ((currentCharacter == '-') || (currentCharacter == '+')) {
 
3202                 // consume next character
 
3203                 unicodeAsBackSlash = false;
 
3204                 currentCharacter = source[currentPosition++];
 
3205 //                if (((currentCharacter = source[currentPosition++]) == '\\')
 
3206 //                  && (source[currentPosition] == 'u')) {
 
3207 //                  getNextUnicodeChar();
 
3209 //                  if (withoutUnicodePtr != 0) {
 
3210 //                    withoutUnicodeBuffer[++withoutUnicodePtr] =
 
3211 //                      currentCharacter;
 
3215               if (!Character.isDigit(currentCharacter))
 
3216                 throw new InvalidInputException(INVALID_FLOAT);
 
3217               while (getNextCharAsDigit()) {
 
3220             //            if (getNextChar('f', 'F') >= 0)
 
3221             //              return TokenNameFloatingPointLiteral;
 
3222             getNextChar('d', 'D'); //jump over potential d or D
 
3223             return TokenNameDoubleLiteral;
 
3225             return TokenNameIntegerLiteral;
 
3233     while (getNextCharAsDigit()) {
 
3236     //    if ((!dotPrefix) && (getNextChar('l', 'L') >= 0))
 
3237     //      return TokenNameLongLiteral;
 
3239     if ((!dotPrefix) && (getNextChar('.'))) { //decimal part that can be empty
 
3240       while (getNextCharAsDigit()) {
 
3245     //if floating is true both exponant and suffix may be optional
 
3247     if (getNextChar('e', 'E') >= 0) {
 
3249       // consume next character
 
3250       unicodeAsBackSlash = false;
 
3251       currentCharacter = source[currentPosition++];
 
3252 //      if (((currentCharacter = source[currentPosition++]) == '\\')
 
3253 //        && (source[currentPosition] == 'u')) {
 
3254 //        getNextUnicodeChar();
 
3256 //        if (withoutUnicodePtr != 0) {
 
3257 //          withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3261       if ((currentCharacter == '-')
 
3262         || (currentCharacter == '+')) { // consume next character
 
3263         unicodeAsBackSlash = false;
 
3264         currentCharacter = source[currentPosition++];
 
3265 //        if (((currentCharacter = source[currentPosition++]) == '\\')
 
3266 //          && (source[currentPosition] == 'u')) {
 
3267 //          getNextUnicodeChar();
 
3269 //          if (withoutUnicodePtr != 0) {
 
3270 //            withoutUnicodeBuffer[++withoutUnicodePtr] = currentCharacter;
 
3274       if (!Character.isDigit(currentCharacter))
 
3275         throw new InvalidInputException(INVALID_FLOAT);
 
3276       while (getNextCharAsDigit()) {
 
3280     if (getNextChar('d', 'D') >= 0)
 
3281       return TokenNameDoubleLiteral;
 
3282     //    if (getNextChar('f', 'F') >= 0)
 
3283     //      return TokenNameFloatingPointLiteral;
 
3285     //the long flag has been tested before
 
3287     return floating ? TokenNameDoubleLiteral : TokenNameIntegerLiteral;
 
3290    * Search the line number corresponding to a specific position
 
3293   public final int getLineNumber(int position) {
 
3295     if (lineEnds == null)
 
3297     int length = linePtr + 1;
 
3300     int g = 0, d = length - 1;
 
3304       if (position < lineEnds[m]) {
 
3306       } else if (position > lineEnds[m]) {
 
3312     if (position < lineEnds[m]) {
 
3318   public void setPHPMode(boolean mode) {
 
3322   public final void setSource(char[] source) {
 
3323     //the source-buffer is set to sourceString
 
3325     if (source == null) {
 
3326       this.source = new char[0];
 
3328       this.source = source;
 
3331     initialPosition = currentPosition = 0;
 
3332     containsAssertKeyword = false;
 
3333     withoutUnicodeBuffer = new char[this.source.length];
 
3337   public String toString() {
 
3338     if (startPosition == source.length)
 
3339       return "EOF\n\n" + new String(source); //$NON-NLS-1$
 
3340     if (currentPosition > source.length)
 
3341       return "behind the EOF :-( ....\n\n" + new String(source); //$NON-NLS-1$
 
3343     char front[] = new char[startPosition];
 
3344     System.arraycopy(source, 0, front, 0, startPosition);
 
3346     int middleLength = (currentPosition - 1) - startPosition + 1;
 
3348     if (middleLength > -1) {
 
3349       middle = new char[middleLength];
 
3350       System.arraycopy(source, startPosition, middle, 0, middleLength);
 
3352       middle = new char[0];
 
3355     char end[] = new char[source.length - (currentPosition - 1)];
 
3358       (currentPosition - 1) + 1,
 
3361       source.length - (currentPosition - 1) - 1);
 
3363     return new String(front) + "\n===============================\nStarts here -->" //$NON-NLS-1$
 
3364     + new String(middle) + "<-- Ends here\n===============================\n" //$NON-NLS-1$
 
3367   public final String toStringAction(int act) {
 
3369       case TokenNameERROR :
 
3370         return "ScannerError(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3371       case TokenNameStopPHP :
 
3372         return "StopPHP(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3373       case TokenNameIdentifier :
 
3374         return "Identifier(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3375       case TokenNameVariable :
 
3376         return "Variable(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3378         return "as"; //$NON-NLS-1$
 
3379       case TokenNamebreak :
 
3380         return "break"; //$NON-NLS-1$
 
3381       case TokenNamecase :
 
3382         return "case"; //$NON-NLS-1$
 
3383       case TokenNameclass :
 
3384         return "class"; //$NON-NLS-1$
 
3385       case TokenNamecontinue :
 
3386         return "continue"; //$NON-NLS-1$
 
3387       case TokenNamedefault :
 
3388         return "default"; //$NON-NLS-1$
 
3389       case TokenNamedefine :
 
3390         return "define"; //$NON-NLS-1$
 
3392         return "do"; //$NON-NLS-1$
 
3393       case TokenNameecho :
 
3394         return "echo"; //$NON-NLS-1$
 
3395       case TokenNameelse :
 
3396         return "else"; //$NON-NLS-1$
 
3397       case TokenNameelseif :
 
3398         return "elseif"; //$NON-NLS-1$
 
3399       case TokenNameendfor :
 
3400         return "endfor"; //$NON-NLS-1$
 
3401       case TokenNameendforeach :
 
3402         return "endforeach"; //$NON-NLS-1$
 
3403       case TokenNameendif :
 
3404         return "endif"; //$NON-NLS-1$
 
3405       case TokenNameendswitch :
 
3406         return "endswitch"; //$NON-NLS-1$
 
3407       case TokenNameendwhile :
 
3408         return "endwhile"; //$NON-NLS-1$
 
3409       case TokenNameextends :
 
3410         return "extends"; //$NON-NLS-1$
 
3411       case TokenNamefalse :
 
3412         return "false"; //$NON-NLS-1$
 
3414         return "for"; //$NON-NLS-1$
 
3415       case TokenNameforeach :
 
3416         return "foreach"; //$NON-NLS-1$
 
3417       case TokenNamefunction :
 
3418         return "function"; //$NON-NLS-1$
 
3419       case TokenNameglobal :
 
3420         return "global"; //$NON-NLS-1$
 
3422         return "if"; //$NON-NLS-1$
 
3423       case TokenNameinclude :
 
3424         return "include"; //$NON-NLS-1$
 
3425       case TokenNameinclude_once :
 
3426         return "include_once"; //$NON-NLS-1$
 
3427       case TokenNamelist :
 
3428         return "list"; //$NON-NLS-1$
 
3430         return "new"; //$NON-NLS-1$
 
3431       case TokenNamenull :
 
3432         return "null"; //$NON-NLS-1$
 
3433       case TokenNameprint :
 
3434         return "print"; //$NON-NLS-1$
 
3435       case TokenNamerequire :
 
3436         return "require"; //$NON-NLS-1$
 
3437       case TokenNamerequire_once :
 
3438         return "require_once"; //$NON-NLS-1$
 
3439       case TokenNamereturn :
 
3440         return "return"; //$NON-NLS-1$
 
3441       case TokenNamestatic :
 
3442         return "static"; //$NON-NLS-1$
 
3443       case TokenNameswitch :
 
3444         return "switch"; //$NON-NLS-1$
 
3445       case TokenNametrue :
 
3446         return "true"; //$NON-NLS-1$
 
3448         return "var"; //$NON-NLS-1$
 
3449       case TokenNamewhile :
 
3450         return "while"; //$NON-NLS-1$
 
3451       case TokenNameIntegerLiteral :
 
3452         return "Integer(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3453       case TokenNameDoubleLiteral :
 
3454         return "Double(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3455       case TokenNameStringLiteral :
 
3456         return "String(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3457       case TokenNameStringConstant :
 
3458         return "StringConstant(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3459       case TokenNameStringInterpolated :
 
3460         return "StringInterpolated(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$ //$NON-NLS-2$
 
3461       case TokenNameHEREDOC :
 
3462         return "HEREDOC(" + new String(getCurrentTokenSource()) + ")"; //$NON-NLS-1$
 
3464       case TokenNamePLUS_PLUS :
 
3465         return "++"; //$NON-NLS-1$
 
3466       case TokenNameMINUS_MINUS :
 
3467         return "--"; //$NON-NLS-1$
 
3468       case TokenNameEQUAL_EQUAL :
 
3469         return "=="; //$NON-NLS-1$
 
3470       case TokenNameEQUAL_GREATER :
 
3471         return "=>"; //$NON-NLS-1$
 
3472       case TokenNameLESS_EQUAL :
 
3473         return "<="; //$NON-NLS-1$
 
3474       case TokenNameGREATER_EQUAL :
 
3475         return ">="; //$NON-NLS-1$
 
3476       case TokenNameNOT_EQUAL :
 
3477         return "!="; //$NON-NLS-1$
 
3478       case TokenNameLEFT_SHIFT :
 
3479         return "<<"; //$NON-NLS-1$
 
3480       case TokenNameRIGHT_SHIFT :
 
3481         return ">>"; //$NON-NLS-1$
 
3482       case TokenNamePLUS_EQUAL :
 
3483         return "+="; //$NON-NLS-1$
 
3484       case TokenNameMINUS_EQUAL :
 
3485         return "-="; //$NON-NLS-1$
 
3486       case TokenNameMULTIPLY_EQUAL :
 
3487         return "*="; //$NON-NLS-1$
 
3488       case TokenNameDIVIDE_EQUAL :
 
3489         return "/="; //$NON-NLS-1$
 
3490       case TokenNameAND_EQUAL :
 
3491         return "&="; //$NON-NLS-1$
 
3492       case TokenNameOR_EQUAL :
 
3493         return "|="; //$NON-NLS-1$
 
3494       case TokenNameXOR_EQUAL :
 
3495         return "^="; //$NON-NLS-1$
 
3496       case TokenNameREMAINDER_EQUAL :
 
3497         return "%="; //$NON-NLS-1$
 
3498       case TokenNameLEFT_SHIFT_EQUAL :
 
3499         return "<<="; //$NON-NLS-1$
 
3500       case TokenNameRIGHT_SHIFT_EQUAL :
 
3501         return ">>="; //$NON-NLS-1$
 
3502       case TokenNameOR_OR :
 
3503         return "||"; //$NON-NLS-1$
 
3504       case TokenNameAND_AND :
 
3505         return "&&"; //$NON-NLS-1$
 
3506       case TokenNamePLUS :
 
3507         return "+"; //$NON-NLS-1$
 
3508       case TokenNameMINUS :
 
3509         return "-"; //$NON-NLS-1$
 
3510       case TokenNameMINUS_GREATER :
 
3513         return "!"; //$NON-NLS-1$
 
3514       case TokenNameREMAINDER :
 
3515         return "%"; //$NON-NLS-1$
 
3517         return "^"; //$NON-NLS-1$
 
3519         return "&"; //$NON-NLS-1$
 
3520       case TokenNameMULTIPLY :
 
3521         return "*"; //$NON-NLS-1$
 
3523         return "|"; //$NON-NLS-1$
 
3524       case TokenNameTWIDDLE :
 
3525         return "~"; //$NON-NLS-1$
 
3526       case TokenNameTWIDDLE_EQUAL :
 
3527         return "~="; //$NON-NLS-1$
 
3528       case TokenNameDIVIDE :
 
3529         return "/"; //$NON-NLS-1$
 
3530       case TokenNameGREATER :
 
3531         return ">"; //$NON-NLS-1$
 
3532       case TokenNameLESS :
 
3533         return "<"; //$NON-NLS-1$
 
3534       case TokenNameLPAREN :
 
3535         return "("; //$NON-NLS-1$
 
3536       case TokenNameRPAREN :
 
3537         return ")"; //$NON-NLS-1$
 
3538       case TokenNameLBRACE :
 
3539         return "{"; //$NON-NLS-1$
 
3540       case TokenNameRBRACE :
 
3541         return "}"; //$NON-NLS-1$
 
3542       case TokenNameLBRACKET :
 
3543         return "["; //$NON-NLS-1$
 
3544       case TokenNameRBRACKET :
 
3545         return "]"; //$NON-NLS-1$
 
3546       case TokenNameSEMICOLON :
 
3547         return ";"; //$NON-NLS-1$
 
3548       case TokenNameQUESTION :
 
3549         return "?"; //$NON-NLS-1$
 
3550       case TokenNameCOLON :
 
3551         return ":"; //$NON-NLS-1$
 
3552       case TokenNameCOMMA :
 
3553         return ","; //$NON-NLS-1$
 
3555         return "."; //$NON-NLS-1$
 
3556       case TokenNameEQUAL :
 
3557         return "="; //$NON-NLS-1$
 
3560       case TokenNameDOLLAR_LBRACE :
 
3563         return "EOF"; //$NON-NLS-1$
 
3565         return "not-a-token(" + (new Integer(act)) + ") " + new String(getCurrentTokenSource()); //$NON-NLS-1$
 
3570     boolean tokenizeComments,
 
3571     boolean tokenizeWhiteSpace,
 
3572     boolean checkNonExternalizedStringLiterals) {
 
3576       checkNonExternalizedStringLiterals,
 
3581     boolean tokenizeComments,
 
3582     boolean tokenizeWhiteSpace,
 
3583     boolean checkNonExternalizedStringLiterals,
 
3584     boolean assertMode) {
 
3585     this.eofPosition = Integer.MAX_VALUE;
 
3586     this.tokenizeComments = tokenizeComments;
 
3587     this.tokenizeWhiteSpace = tokenizeWhiteSpace;
 
3588     this.checkNonExternalizedStringLiterals =
 
3589       checkNonExternalizedStringLiterals;
 
3590     this.assertMode = assertMode;
 
3593   private void checkNonExternalizeString() throws InvalidInputException {
 
3594     if (currentLine == null)
 
3596     parseTags(currentLine);
 
3599   private void parseTags(NLSLine line) throws InvalidInputException {
 
3600     String s = new String(getCurrentTokenSource());
 
3601     int pos = s.indexOf(TAG_PREFIX);
 
3602     int lineLength = line.size();
 
3604       int start = pos + TAG_PREFIX_LENGTH;
 
3605       int end = s.indexOf(TAG_POSTFIX, start);
 
3606       String index = s.substring(start, end);
 
3609         i = Integer.parseInt(index) - 1;
 
3610         // Tags are one based not zero based.
 
3611       } catch (NumberFormatException e) {
 
3612         i = -1; // we don't want to consider this as a valid NLS tag
 
3614       if (line.exists(i)) {
 
3617       pos = s.indexOf(TAG_PREFIX, start);
 
3620     this.nonNLSStrings = new StringLiteral[lineLength];
 
3621     int nonNLSCounter = 0;
 
3622     for (Iterator iterator = line.iterator(); iterator.hasNext();) {
 
3623       StringLiteral literal = (StringLiteral) iterator.next();
 
3624       if (literal != null) {
 
3625         this.nonNLSStrings[nonNLSCounter++] = literal;
 
3628     if (nonNLSCounter == 0) {
 
3629       this.nonNLSStrings = null;
 
3633     this.wasNonExternalizedStringLiteral = true;
 
3634     if (nonNLSCounter != lineLength) {
 
3638         (this.nonNLSStrings = new StringLiteral[nonNLSCounter]),